diff options
Diffstat (limited to 'bolt')
38 files changed, 793 insertions, 176 deletions
diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h index 9a9d7b8..b4f31cf 100644 --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -19,6 +19,7 @@ #include "bolt/Core/MCPlus.h" #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorOr.h" diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 5fb32a1..08ce892 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -71,14 +71,15 @@ struct SegmentInfo { uint64_t FileOffset; /// Offset in the file. uint64_t FileSize; /// Size in file. uint64_t Alignment; /// Alignment of the segment. + bool IsExecutable; /// Is the executable bit set on the Segment? void print(raw_ostream &OS) const { - OS << "SegmentInfo { Address: 0x" - << Twine::utohexstr(Address) << ", Size: 0x" - << Twine::utohexstr(Size) << ", FileOffset: 0x" + OS << "SegmentInfo { Address: 0x" << Twine::utohexstr(Address) + << ", Size: 0x" << Twine::utohexstr(Size) << ", FileOffset: 0x" << Twine::utohexstr(FileOffset) << ", FileSize: 0x" << Twine::utohexstr(FileSize) << ", Alignment: 0x" - << Twine::utohexstr(Alignment) << "}"; + << Twine::utohexstr(Alignment) << ", " << (IsExecutable ? "x" : " ") + << "}"; }; }; diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h index 8a67b3e..6a773c4 100644 --- a/bolt/include/bolt/Core/BinaryData.h +++ b/bolt/include/bolt/Core/BinaryData.h @@ -226,7 +226,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, Sep = ",\n "; TotalCount += AccessInfo.Count; } - SS.flush(); OS << TotalCount << " total counts : " << TempString; return OS; diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 24c7db2..fc0375b 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -117,7 +117,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, TotalCount += CSP.Count; TotalMispreds += CSP.Mispreds; } - SS.flush(); OS << TotalCount << " (" << TotalMispreds << " misses) :" << TempString; return OS; @@ -1692,6 +1691,8 @@ public: void setPseudo(bool Pseudo) { IsPseudo = Pseudo; } + void setPreserveNops(bool Value) { PreserveNops = Value; } + BinaryFunction &setUsesGnuArgsSize(bool Uses = true) { UsesGnuArgsSize = Uses; return *this; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index 2a0514d..9865118 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> { namespace bolt { struct PseudoProbeInfo { - llvm::yaml::Hex64 GUID; - uint64_t Index; - uint8_t Type; + uint32_t InlineTreeIndex = 0; + uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64 + std::vector<uint64_t> BlockProbes; // block probes with indices above 64 + std::vector<uint64_t> CallProbes; + std::vector<uint64_t> IndCallProbes; + std::vector<uint32_t> InlineTreeNodes; bool operator==(const PseudoProbeInfo &Other) const { - return GUID == Other.GUID && Index == Other.Index; - } - bool operator!=(const PseudoProbeInfo &Other) const { - return !(*this == Other); + return InlineTreeIndex == Other.InlineTreeIndex && + BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes && + IndCallProbes == Other.IndCallProbes; } }; } // end namespace bolt template <> struct MappingTraits<bolt::PseudoProbeInfo> { static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) { - YamlIO.mapRequired("guid", PI.GUID); - YamlIO.mapRequired("id", PI.Index); - YamlIO.mapRequired("type", PI.Type); + YamlIO.mapOptional("blx", PI.BlockMask, 0); + YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>()); + YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>()); + YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>()); + YamlIO.mapOptional("id", PI.InlineTreeIndex, 0); + YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>()); } static const bool flow = true; @@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> { std::vector<bolt::CallSiteInfo>()); YamlIO.mapOptional("succ", BBP.Successors, std::vector<bolt::SuccessorInfo>()); - YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes, + YamlIO.mapOptional("probes", BBP.PseudoProbes, std::vector<bolt::PseudoProbeInfo>()); } }; +namespace bolt { +struct InlineTreeNode { + uint32_t ParentIndexDelta; + uint32_t CallSiteProbe; + // Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted) + uint32_t GUIDIndex; + bool operator==(const InlineTreeNode &) const { return false; } +}; +} // end namespace bolt + +template <> struct MappingTraits<bolt::InlineTreeNode> { + static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) { + YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX); + YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0); + YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0); + } + + static const bool flow = true; +}; } // end namespace yaml } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode) namespace llvm { namespace yaml { @@ -179,8 +204,7 @@ struct BinaryFunctionProfile { llvm::yaml::Hex64 Hash{0}; uint64_t ExecCount{0}; std::vector<BinaryBasicBlockProfile> Blocks; - llvm::yaml::Hex64 GUID{0}; - llvm::yaml::Hex64 PseudoProbeDescHash{0}; + std::vector<InlineTreeNode> InlineTree; bool Used{false}; }; } // end namespace bolt @@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> { YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); YamlIO.mapOptional("blocks", BFP.Blocks, std::vector<bolt::BinaryBasicBlockProfile>()); - YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0); - YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash, - (uint64_t)0); + YamlIO.mapOptional("inline_tree", BFP.InlineTree, + std::vector<bolt::InlineTreeNode>()); } }; @@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> { } }; +namespace bolt { +struct ProfilePseudoProbeDesc { + std::vector<Hex64> GUID; + std::vector<Hex64> Hash; + std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash + + bool operator==(const ProfilePseudoProbeDesc &Other) const { + // Only treat empty Desc as equal + return GUID.empty() && Other.GUID.empty() && Hash.empty() && + Other.Hash.empty() && GUIDHashIdx.empty() && + Other.GUIDHashIdx.empty(); + } +}; +} // end namespace bolt + +template <> struct MappingTraits<bolt::ProfilePseudoProbeDesc> { + static void mapping(IO &YamlIO, bolt::ProfilePseudoProbeDesc &PD) { + YamlIO.mapRequired("gs", PD.GUID); + YamlIO.mapRequired("gh", PD.GUIDHashIdx); + YamlIO.mapRequired("hs", PD.Hash); + } +}; } // end namespace yaml } // end namespace llvm LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::ProfilePseudoProbeDesc) namespace llvm { namespace yaml { @@ -258,6 +304,7 @@ namespace bolt { struct BinaryProfile { BinaryProfileHeader Header; std::vector<BinaryFunctionProfile> Functions; + ProfilePseudoProbeDesc PseudoProbeDesc; }; } // namespace bolt @@ -265,6 +312,8 @@ template <> struct MappingTraits<bolt::BinaryProfile> { static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) { YamlIO.mapRequired("header", BP.Header); YamlIO.mapRequired("functions", BP.Functions); + YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc, + bolt::ProfilePseudoProbeDesc()); } }; diff --git a/bolt/include/bolt/Profile/YAMLProfileWriter.h b/bolt/include/bolt/Profile/YAMLProfileWriter.h index 4a9355d..d4d7217 100644 --- a/bolt/include/bolt/Profile/YAMLProfileWriter.h +++ b/bolt/include/bolt/Profile/YAMLProfileWriter.h @@ -32,8 +32,27 @@ public: /// Save execution profile for that instance. std::error_code writeProfile(const RewriteInstance &RI); + using InlineTreeMapTy = + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>; + struct InlineTreeDesc { + template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>; + using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>; + using GUIDNumMap = GUIDMapTy<uint32_t>; + GUIDNodeMap TopLevelGUIDToInlineTree; + GUIDNumMap GUIDIdxMap; + GUIDNumMap HashIdxMap; + }; + + static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy> + convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, + const InlineTreeDesc &InlineTree, uint64_t GUID); + + static std::tuple<yaml::bolt::ProfilePseudoProbeDesc, InlineTreeDesc> + convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder); + static yaml::bolt::BinaryFunctionProfile convert(const BinaryFunction &BF, bool UseDFS, + const InlineTreeDesc &InlineTree, const BoltAddressTranslation *BAT = nullptr); /// Set CallSiteInfo destination fields from \p Symbol and return a target @@ -42,8 +61,39 @@ public: setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI, const MCSymbol *Symbol, const BoltAddressTranslation *BAT, uint32_t Offset = 0); -}; +private: + struct InlineTreeNode { + const MCDecodedPseudoProbeInlineTree *InlineTree; + uint64_t GUID; + uint64_t Hash; + uint32_t ParentId; + uint32_t InlineSite; + }; + static std::vector<InlineTreeNode> + collectInlineTree(const MCPseudoProbeDecoder &Decoder, + const MCDecodedPseudoProbeInlineTree &Root); + + // 0 - block probe, 1 - indirect call, 2 - direct call + using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>; + using NodeIdToProbes = DenseMap<uint32_t, ProbeList>; + static std::vector<yaml::bolt::PseudoProbeInfo> + convertNodeProbes(NodeIdToProbes &NodeProbes); + +public: + template <typename T> + static std::vector<yaml::bolt::PseudoProbeInfo> + writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) { + NodeIdToProbes NodeProbes; + for (const MCDecodedPseudoProbe &Probe : Probes) { + auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode()); + if (It == InlineTreeNodeId.end()) + continue; + NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex()); + } + return convertNodeProbes(NodeProbes); + } +}; } // namespace bolt } // namespace llvm diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h index 16a82d5..e5b7ad6 100644 --- a/bolt/include/bolt/Rewrite/RewriteInstance.h +++ b/bolt/include/bolt/Rewrite/RewriteInstance.h @@ -510,12 +510,11 @@ private: }; /// Different types of X86-64 PLT sections. - const PLTSectionInfo X86_64_PLTSections[4] = { - { ".plt", 16 }, - { ".plt.got", 8 }, - { ".plt.sec", 8 }, - { nullptr, 0 } - }; + const PLTSectionInfo X86_64_PLTSections[5] = {{".plt", 16}, + {".plt.got", 8}, + {".plt.sec", 8}, + {".iplt", 16}, + {nullptr, 0}}; /// AArch64 PLT sections. const PLTSectionInfo AArch64_PLTSections[4] = { diff --git a/bolt/include/bolt/Utils/Utils.h b/bolt/include/bolt/Utils/Utils.h index 3886c5f..9baee7d 100644 --- a/bolt/include/bolt/Utils/Utils.h +++ b/bolt/include/bolt/Utils/Utils.h @@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name); /// Return the unescaped name std::string getUnescapedName(const StringRef &Name); +/// Return a common part for a given \p Name wrt a given \p Suffixes list. +/// Preserve the suffix if \p KeepSuffix is set, only dropping characters +/// following it, otherwise drop the suffix as well. +std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix, + ArrayRef<StringRef> Suffixes); /// LTO-generated function names take a form: /// /// <function_name>.lto_priv.<decimal_number>/... diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index cd137f4..1347047 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -2021,6 +2021,9 @@ BinaryContext::getBaseAddressForMapping(uint64_t MMapAddress, // Find a segment with a matching file offset. for (auto &KV : SegmentMapInfo) { const SegmentInfo &SegInfo = KV.second; + // Only consider executable segments. + if (!SegInfo.IsExecutable) + continue; // FileOffset is got from perf event, // and it is equal to alignDown(SegInfo.FileOffset, pagesize). // If the pagesize is not equal to SegInfo.Alignment. diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index af982fd..36c42fc 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -165,6 +165,12 @@ bool shouldPrint(const BinaryFunction &Function) { } } + std::optional<StringRef> Origin = Function.getOriginSectionName(); + if (Origin && llvm::any_of(opts::PrintOnly, [&](const std::string &Name) { + return Name == *Origin; + })) + return true; + return false; } @@ -1339,22 +1345,10 @@ Error BinaryFunction::disassemble() { BC.getBinaryFunctionContainingAddress(TargetAddress)) TargetFunc->setIgnored(); - if (IsCall && containsAddress(TargetAddress)) { - if (TargetAddress == getAddress()) { - // Recursive call. - TargetSymbol = getSymbol(); - } else { - if (BC.isX86()) { - // Dangerous old-style x86 PIC code. We may need to freeze this - // function, so preserve the function as is for now. - PreserveNops = true; - } else { - BC.errs() << "BOLT-WARNING: internal call detected at 0x" - << Twine::utohexstr(AbsoluteInstrAddr) - << " in function " << *this << ". Skipping.\n"; - IsSimple = false; - } - } + if (IsCall && TargetAddress == getAddress()) { + // A recursive call. Calls to internal blocks are handled by + // ValidateInternalCalls pass. + TargetSymbol = getSymbol(); } if (!TargetSymbol) { diff --git a/bolt/lib/Passes/ADRRelaxationPass.cpp b/bolt/lib/Passes/ADRRelaxationPass.cpp index 24fddbc..256034a 100644 --- a/bolt/lib/Passes/ADRRelaxationPass.cpp +++ b/bolt/lib/Passes/ADRRelaxationPass.cpp @@ -59,10 +59,15 @@ void ADRRelaxationPass::runOnFunction(BinaryFunction &BF) { // Don't relax adr if it points to the same function and it is not split // and BF initial size is < 1MB. const unsigned OneMB = 0x100000; - if (!BF.isSplit() && BF.getSize() < OneMB) { + if (BF.getSize() < OneMB) { BinaryFunction *TargetBF = BC.getFunctionForSymbol(Symbol); - if (TargetBF && TargetBF == &BF) + if (TargetBF == &BF && !BF.isSplit()) continue; + // No relaxation needed if ADR references a basic block in the same + // fragment. + if (BinaryBasicBlock *TargetBB = BF.getBasicBlockForLabel(Symbol)) + if (BB.getFragmentNum() == TargetBB->getFragmentNum()) + continue; } MCPhysReg Reg; diff --git a/bolt/lib/Passes/RetpolineInsertion.cpp b/bolt/lib/Passes/RetpolineInsertion.cpp index 2808575..171177d 100644 --- a/bolt/lib/Passes/RetpolineInsertion.cpp +++ b/bolt/lib/Passes/RetpolineInsertion.cpp @@ -181,7 +181,6 @@ std::string createRetpolineFunctionTag(BinaryContext &BC, if (BrInfo.isReg()) { BC.InstPrinter->printRegName(TagOS, BrInfo.BranchReg); TagOS << "_"; - TagOS.flush(); return Tag; } @@ -212,7 +211,6 @@ std::string createRetpolineFunctionTag(BinaryContext &BC, BC.InstPrinter->printRegName(TagOS, MemRef.SegRegNum); } - TagOS.flush(); return Tag; } diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp index 88df2e5..bdab895 100644 --- a/bolt/lib/Passes/ValidateInternalCalls.cpp +++ b/bolt/lib/Passes/ValidateInternalCalls.cpp @@ -302,9 +302,6 @@ bool ValidateInternalCalls::analyzeFunction(BinaryFunction &Function) const { } Error ValidateInternalCalls::runOnFunctions(BinaryContext &BC) { - if (!BC.isX86()) - return Error::success(); - // Look for functions that need validation. This should be pretty rare. std::set<BinaryFunction *> NeedsValidation; for (auto &BFI : BC.getBinaryFunctions()) { @@ -312,14 +309,20 @@ Error ValidateInternalCalls::runOnFunctions(BinaryContext &BC) { for (BinaryBasicBlock &BB : Function) { for (MCInst &Inst : BB) { if (getInternalCallTarget(Function, Inst)) { + BC.errs() << "BOLT-WARNING: internal call detected in function " + << Function << '\n'; NeedsValidation.insert(&Function); Function.setSimple(false); + Function.setPreserveNops(true); break; } } } } + if (!BC.isX86()) + return Error::success(); + // Skip validation for non-relocation mode if (!BC.HasRelocations) return Error::success(); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 813d825..0a63148 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -88,7 +88,7 @@ MaxSamples("max-samples", cl::cat(AggregatorCategory)); extern cl::opt<opts::ProfileFormatKind> ProfileFormat; -extern cl::opt<bool> ProfileUsePseudoProbes; +extern cl::opt<bool> ProfileWritePseudoProbes; extern cl::opt<std::string> SaveProfile; cl::opt<bool> ReadPreAggregated( @@ -2043,7 +2043,8 @@ std::error_code DataAggregator::parseMMapEvents() { // size of the mapping, but we know it should not exceed the segment // alignment value. Hence we are performing an approximate check. return SegInfo.Address >= MMapInfo.MMapAddress && - SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment; + SegInfo.Address - MMapInfo.MMapAddress < SegInfo.Alignment && + SegInfo.IsExecutable; }); if (!MatchFound) { errs() << "PERF2BOLT-WARNING: ignoring mapping of " << NameToUse @@ -2300,7 +2301,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, yaml::bolt::BinaryProfile BP; const MCPseudoProbeDecoder *PseudoProbeDecoder = - opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; // Fill out the header info. BP.Header.Version = 1; @@ -2321,6 +2322,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE : BinaryFunction::PF_LBR; + // Add probe inline tree nodes. + YAMLProfileWriter::InlineTreeDesc InlineTree; + if (PseudoProbeDecoder) + std::tie(BP.PseudoProbeDesc, InlineTree) = + YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder); + if (!opts::BasicAggregation) { // Convert profile for functions not covered by BAT for (auto &BFI : BC.getBinaryFunctions()) { @@ -2329,8 +2336,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, continue; if (BAT->isBATFunction(Function.getAddress())) continue; - BP.Functions.emplace_back( - YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT)); + BP.Functions.emplace_back(YAMLProfileWriter::convert( + Function, /*UseDFS=*/false, InlineTree, BAT)); } for (const auto &KV : NamesToBranches) { @@ -2403,16 +2410,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches; } if (PseudoProbeDecoder) { - if ((YamlBF.GUID = BF->getGUID())) { - const MCPseudoProbeFuncDesc *FuncDesc = - PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID); - YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash; + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> + InlineTreeNodeId; + if (BF->getGUID()) { + std::tie(YamlBF.InlineTree, InlineTreeNodeId) = + YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder, + InlineTree, BF->getGUID()); } // Fetch probes belonging to all fragments const AddressProbesMap &ProbeMap = PseudoProbeDecoder->getAddress2ProbesMap(); BinaryFunction::FragmentsSetTy Fragments(BF->Fragments); Fragments.insert(BF); + DenseMap< + uint32_t, + std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>> + BlockProbes; for (const BinaryFunction *F : Fragments) { const uint64_t FuncAddr = F->getAddress(); for (const MCDecodedPseudoProbe &Probe : @@ -2421,17 +2434,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, const uint32_t InputOffset = BAT->translate( FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true); const unsigned BlockIndex = getBlock(InputOffset).second; - YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back( - yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(), - Probe.getType()}); + BlockProbes[BlockIndex].emplace_back(Probe); } } + + for (auto &[Block, Probes] : BlockProbes) { + YamlBF.Blocks[Block].PseudoProbes = + YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId); + } } - // Drop blocks without a hash, won't be useful for stale matching. - llvm::erase_if(YamlBF.Blocks, - [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { - return YamlBB.Hash == (yaml::Hex64)0; - }); + // Skip printing if there's no profile data + llvm::erase_if( + YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) { + auto HasCount = [](const auto &SI) { return SI.Count; }; + bool HasAnyCount = YamlBB.ExecCount || + llvm::any_of(YamlBB.Successors, HasCount) || + llvm::any_of(YamlBB.CallSites, HasCount); + return !HasAnyCount; + }); BP.Functions.emplace_back(YamlBF); } } diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp index 3eca5e9..67ed320 100644 --- a/bolt/lib/Profile/YAMLProfileReader.cpp +++ b/bolt/lib/Profile/YAMLProfileReader.cpp @@ -49,11 +49,6 @@ llvm::cl::opt<bool> llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs", cl::desc("use DFS order for YAML profile"), cl::Hidden, cl::cat(BoltOptCategory)); - -llvm::cl::opt<bool> ProfileUsePseudoProbes( - "profile-use-pseudo-probes", - cl::desc("Use pseudo probes for profile generation and matching"), - cl::Hidden, cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { @@ -373,6 +368,7 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) { return errorCodeToError(EC); } yaml::Input YamlInput(MB.get()->getBuffer()); + YamlInput.setAllowUnknownKeys(true); // Consume YAML file. YamlInput >> YamlBP; diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp index f74cf60..4437be4 100644 --- a/bolt/lib/Profile/YAMLProfileWriter.cpp +++ b/bolt/lib/Profile/YAMLProfileWriter.cpp @@ -13,6 +13,8 @@ #include "bolt/Profile/DataAggregator.h" #include "bolt/Profile/ProfileReaderBase.h" #include "bolt/Rewrite/RewriteInstance.h" +#include "bolt/Utils/CommandLineOpts.h" +#include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/raw_ostream.h" @@ -21,8 +23,12 @@ #define DEBUG_TYPE "bolt-prof" namespace opts { -extern llvm::cl::opt<bool> ProfileUseDFS; -extern llvm::cl::opt<bool> ProfileUsePseudoProbes; +using namespace llvm; +extern cl::opt<bool> ProfileUseDFS; +cl::opt<bool> ProfileWritePseudoProbes( + "profile-write-pseudo-probes", + cl::desc("Use pseudo probes in profile generation"), cl::Hidden, + cl::cat(BoltOptCategory)); } // namespace opts namespace llvm { @@ -53,13 +59,164 @@ const BinaryFunction *YAMLProfileWriter::setCSIDestination( return nullptr; } +std::vector<YAMLProfileWriter::InlineTreeNode> +YAMLProfileWriter::collectInlineTree( + const MCPseudoProbeDecoder &Decoder, + const MCDecodedPseudoProbeInlineTree &Root) { + auto getHash = [&](const MCDecodedPseudoProbeInlineTree &Node) { + return Decoder.getFuncDescForGUID(Node.Guid)->FuncHash; + }; + std::vector<InlineTreeNode> InlineTree( + {InlineTreeNode{&Root, Root.Guid, getHash(Root), 0, 0}}); + uint32_t ParentId = 0; + while (ParentId != InlineTree.size()) { + const MCDecodedPseudoProbeInlineTree *Cur = InlineTree[ParentId].InlineTree; + for (const MCDecodedPseudoProbeInlineTree &Child : Cur->getChildren()) + InlineTree.emplace_back( + InlineTreeNode{&Child, Child.Guid, getHash(Child), ParentId, + std::get<1>(Child.getInlineSite())}); + ++ParentId; + } + + return InlineTree; +} + +std::tuple<yaml::bolt::ProfilePseudoProbeDesc, + YAMLProfileWriter::InlineTreeDesc> +YAMLProfileWriter::convertPseudoProbeDesc(const MCPseudoProbeDecoder &Decoder) { + yaml::bolt::ProfilePseudoProbeDesc Desc; + InlineTreeDesc InlineTree; + + for (const MCDecodedPseudoProbeInlineTree &TopLev : + Decoder.getDummyInlineRoot().getChildren()) + InlineTree.TopLevelGUIDToInlineTree[TopLev.Guid] = &TopLev; + + for (const auto &FuncDesc : Decoder.getGUID2FuncDescMap()) + ++InlineTree.HashIdxMap[FuncDesc.FuncHash]; + + InlineTree.GUIDIdxMap.reserve(Decoder.getGUID2FuncDescMap().size()); + for (const auto &Node : Decoder.getInlineTreeVec()) + ++InlineTree.GUIDIdxMap[Node.Guid]; + + std::vector<std::pair<uint32_t, uint64_t>> GUIDFreqVec; + GUIDFreqVec.reserve(InlineTree.GUIDIdxMap.size()); + for (const auto [GUID, Cnt] : InlineTree.GUIDIdxMap) + GUIDFreqVec.emplace_back(Cnt, GUID); + llvm::sort(GUIDFreqVec); + + std::vector<std::pair<uint32_t, uint64_t>> HashFreqVec; + HashFreqVec.reserve(InlineTree.HashIdxMap.size()); + for (const auto [Hash, Cnt] : InlineTree.HashIdxMap) + HashFreqVec.emplace_back(Cnt, Hash); + llvm::sort(HashFreqVec); + + uint32_t Index = 0; + Desc.Hash.reserve(HashFreqVec.size()); + for (uint64_t Hash : llvm::make_second_range(llvm::reverse(HashFreqVec))) { + Desc.Hash.emplace_back(Hash); + InlineTree.HashIdxMap[Hash] = Index++; + } + + Index = 0; + Desc.GUID.reserve(GUIDFreqVec.size()); + for (uint64_t GUID : llvm::make_second_range(llvm::reverse(GUIDFreqVec))) { + Desc.GUID.emplace_back(GUID); + InlineTree.GUIDIdxMap[GUID] = Index++; + uint64_t Hash = Decoder.getFuncDescForGUID(GUID)->FuncHash; + Desc.GUIDHashIdx.emplace_back(InlineTree.HashIdxMap[Hash]); + } + + return {Desc, InlineTree}; +} + +std::vector<yaml::bolt::PseudoProbeInfo> +YAMLProfileWriter::convertNodeProbes(NodeIdToProbes &NodeProbes) { + struct BlockProbeInfoHasher { + size_t operator()(const yaml::bolt::PseudoProbeInfo &BPI) const { + auto HashCombine = [](auto &Range) { + return llvm::hash_combine_range(Range.begin(), Range.end()); + }; + return llvm::hash_combine(HashCombine(BPI.BlockProbes), + HashCombine(BPI.CallProbes), + HashCombine(BPI.IndCallProbes)); + } + }; + + // Check identical BlockProbeInfo structs and merge them + std::unordered_map<yaml::bolt::PseudoProbeInfo, std::vector<uint32_t>, + BlockProbeInfoHasher> + BPIToNodes; + for (auto &[NodeId, Probes] : NodeProbes) { + yaml::bolt::PseudoProbeInfo BPI; + BPI.BlockProbes = std::vector(Probes[0].begin(), Probes[0].end()); + BPI.IndCallProbes = std::vector(Probes[1].begin(), Probes[1].end()); + BPI.CallProbes = std::vector(Probes[2].begin(), Probes[2].end()); + BPIToNodes[BPI].push_back(NodeId); + } + + auto handleMask = [](const auto &Ids, auto &Vec, auto &Mask) { + for (auto Id : Ids) + if (Id > 64) + Vec.emplace_back(Id); + else + Mask |= 1ull << (Id - 1); + }; + + // Add to YAML with merged nodes/block mask optimizations + std::vector<yaml::bolt::PseudoProbeInfo> YamlProbes; + YamlProbes.reserve(BPIToNodes.size()); + for (const auto &[BPI, Nodes] : BPIToNodes) { + auto &YamlBPI = YamlProbes.emplace_back(yaml::bolt::PseudoProbeInfo()); + YamlBPI.CallProbes = BPI.CallProbes; + YamlBPI.IndCallProbes = BPI.IndCallProbes; + if (Nodes.size() == 1) + YamlBPI.InlineTreeIndex = Nodes.front(); + else + YamlBPI.InlineTreeNodes = Nodes; + handleMask(BPI.BlockProbes, YamlBPI.BlockProbes, YamlBPI.BlockMask); + } + return YamlProbes; +} + +std::tuple<std::vector<yaml::bolt::InlineTreeNode>, + YAMLProfileWriter::InlineTreeMapTy> +YAMLProfileWriter::convertBFInlineTree(const MCPseudoProbeDecoder &Decoder, + const InlineTreeDesc &InlineTree, + uint64_t GUID) { + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; + std::vector<yaml::bolt::InlineTreeNode> YamlInlineTree; + auto It = InlineTree.TopLevelGUIDToInlineTree.find(GUID); + if (It == InlineTree.TopLevelGUIDToInlineTree.end()) + return {YamlInlineTree, InlineTreeNodeId}; + const MCDecodedPseudoProbeInlineTree *Root = It->second; + assert(Root && "Malformed TopLevelGUIDToInlineTree"); + uint32_t Index = 0; + uint32_t PrevParent = 0; + uint32_t PrevGUIDIdx = 0; + for (const auto &Node : collectInlineTree(Decoder, *Root)) { + InlineTreeNodeId[Node.InlineTree] = Index++; + auto GUIDIdxIt = InlineTree.GUIDIdxMap.find(Node.GUID); + assert(GUIDIdxIt != InlineTree.GUIDIdxMap.end() && "Malformed GUIDIdxMap"); + uint32_t GUIDIdx = GUIDIdxIt->second; + if (GUIDIdx == PrevGUIDIdx) + GUIDIdx = UINT32_MAX; + else + PrevGUIDIdx = GUIDIdx; + YamlInlineTree.emplace_back(yaml::bolt::InlineTreeNode{ + Node.ParentId - PrevParent, Node.InlineSite, GUIDIdx}); + PrevParent = Node.ParentId; + } + return {YamlInlineTree, InlineTreeNodeId}; +} + yaml::bolt::BinaryFunctionProfile YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, + const InlineTreeDesc &InlineTree, const BoltAddressTranslation *BAT) { yaml::bolt::BinaryFunctionProfile YamlBF; const BinaryContext &BC = BF.getBinaryContext(); const MCPseudoProbeDecoder *PseudoProbeDecoder = - opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr; const uint16_t LBRProfile = BF.getProfileFlags() & BinaryFunction::PF_LBR; @@ -72,12 +229,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, YamlBF.Hash = BF.getHash(); YamlBF.NumBasicBlocks = BF.size(); YamlBF.ExecCount = BF.getKnownExecutionCount(); - if (PseudoProbeDecoder) { - if ((YamlBF.GUID = BF.getGUID())) { - const MCPseudoProbeFuncDesc *FuncDesc = - PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID); - YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash; - } + DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t> InlineTreeNodeId; + if (PseudoProbeDecoder && BF.getGUID()) { + std::tie(YamlBF.InlineTree, InlineTreeNodeId) = + convertBFInlineTree(*PseudoProbeDecoder, InlineTree, BF.getGUID()); } BinaryFunction::BasicBlockOrderType Order; @@ -193,10 +348,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS, const uint64_t FuncAddr = BF.getAddress(); const std::pair<uint64_t, uint64_t> &BlockRange = BB->getInputAddressRange(); - for (const MCDecodedPseudoProbe &Probe : ProbeMap.find( - FuncAddr + BlockRange.first, FuncAddr + BlockRange.second)) - YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{ - Probe.getGuid(), Probe.getIndex(), Probe.getType()}); + const std::pair<uint64_t, uint64_t> BlockAddrRange = { + FuncAddr + BlockRange.first, FuncAddr + BlockRange.second}; + auto Probes = ProbeMap.find(BlockAddrRange.first, BlockAddrRange.second); + YamlBB.PseudoProbes = writeBlockProbes(Probes, InlineTreeNodeId); } YamlBF.Blocks.emplace_back(YamlBB); @@ -251,6 +406,12 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { } BP.Header.Flags = ProfileFlags; + // Add probe inline tree nodes. + InlineTreeDesc InlineTree; + if (const MCPseudoProbeDecoder *Decoder = + opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr) + std::tie(BP.PseudoProbeDesc, InlineTree) = convertPseudoProbeDesc(*Decoder); + // Add all function objects. for (const auto &BFI : Functions) { const BinaryFunction &BF = BFI.second; @@ -258,7 +419,7 @@ std::error_code YAMLProfileWriter::writeProfile(const RewriteInstance &RI) { if (!BF.hasValidProfile() && !RI.getProfileReader()->isTrustedSource()) continue; - BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS)); + BP.Functions.emplace_back(convert(BF, opts::ProfileUseDFS, InlineTree)); } } diff --git a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp index 4925b4b..8647df4 100644 --- a/bolt/lib/Rewrite/PseudoProbeRewriter.cpp +++ b/bolt/lib/Rewrite/PseudoProbeRewriter.cpp @@ -14,6 +14,7 @@ #include "bolt/Rewrite/MetadataRewriter.h" #include "bolt/Rewrite/MetadataRewriters.h" #include "bolt/Utils/CommandLineOpts.h" +#include "bolt/Utils/Utils.h" #include "llvm/IR/Function.h" #include "llvm/MC/MCPseudoProbe.h" #include "llvm/Support/CommandLine.h" @@ -49,7 +50,7 @@ static cl::opt<PrintPseudoProbesOptions> PrintPseudoProbes( clEnumValN(PPP_All, "all", "enable all debugging printout")), cl::Hidden, cl::cat(BoltCategory)); -extern cl::opt<bool> ProfileUsePseudoProbes; +extern cl::opt<bool> ProfileWritePseudoProbes; } // namespace opts namespace { @@ -71,7 +72,8 @@ class PseudoProbeRewriter final : public MetadataRewriter { /// Parse .pseudo_probe_desc section and .pseudo_probe section /// Setup Pseudo probe decoder - void parsePseudoProbe(); + /// If \p ProfiledOnly is set, only parse records for functions with profile. + void parsePseudoProbe(bool ProfiledOnly = false); /// PseudoProbe decoder std::shared_ptr<MCPseudoProbeDecoder> ProbeDecoderPtr; @@ -90,21 +92,21 @@ public: }; Error PseudoProbeRewriter::preCFGInitializer() { - if (opts::ProfileUsePseudoProbes) - parsePseudoProbe(); + if (opts::ProfileWritePseudoProbes) + parsePseudoProbe(true); return Error::success(); } Error PseudoProbeRewriter::postEmitFinalizer() { - if (!opts::ProfileUsePseudoProbes) + if (!opts::ProfileWritePseudoProbes) parsePseudoProbe(); updatePseudoProbes(); return Error::success(); } -void PseudoProbeRewriter::parsePseudoProbe() { +void PseudoProbeRewriter::parsePseudoProbe(bool ProfiledOnly) { MCPseudoProbeDecoder &ProbeDecoder(*ProbeDecoderPtr); PseudoProbeDescSection = BC.getUniqueSectionByName(".pseudo_probe_desc"); PseudoProbeSection = BC.getUniqueSectionByName(".pseudo_probe"); @@ -133,10 +135,22 @@ void PseudoProbeRewriter::parsePseudoProbe() { MCPseudoProbeDecoder::Uint64Set GuidFilter; MCPseudoProbeDecoder::Uint64Map FuncStartAddrs; + SmallVector<StringRef, 0> Suffixes( + {".destroy", ".resume", ".llvm.", ".cold", ".warm"}); for (const BinaryFunction *F : BC.getAllBinaryFunctions()) { + bool HasProfile = F->hasProfileAvailable(); for (const MCSymbol *Sym : F->getSymbols()) { - FuncStartAddrs[Function::getGUID(NameResolver::restore(Sym->getName()))] = - F->getAddress(); + StringRef SymName = Sym->getName(); + for (auto Name : {std::optional(NameResolver::restore(SymName)), + getCommonName(SymName, false, Suffixes)}) { + if (!Name) + continue; + SymName = *Name; + uint64_t GUID = Function::getGUID(SymName); + FuncStartAddrs[GUID] = F->getAddress(); + if (ProfiledOnly && HasProfile) + GuidFilter.insert(GUID); + } } } Contents = PseudoProbeSection->getContents(); @@ -155,13 +169,25 @@ void PseudoProbeRewriter::parsePseudoProbe() { ProbeDecoder.printProbesForAllAddresses(outs()); } - for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) { - uint64_t GUID = FuncDesc.FuncGUID; - if (!FuncStartAddrs.contains(GUID)) - continue; - BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]); - assert(BF); - BF->setGUID(GUID); + const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap(); + // Checks GUID in GUID2Func and returns it if it's present or null otherwise. + auto checkGUID = [&](StringRef SymName) -> uint64_t { + uint64_t GUID = Function::getGUID(SymName); + if (GUID2Func.find(GUID) == GUID2Func.end()) + return 0; + return GUID; + }; + for (BinaryFunction *F : BC.getAllBinaryFunctions()) { + for (const MCSymbol *Sym : F->getSymbols()) { + StringRef SymName = NameResolver::restore(Sym->getName()); + uint64_t GUID = checkGUID(SymName); + std::optional<StringRef> CommonName = + getCommonName(SymName, false, Suffixes); + if (!GUID && CommonName) + GUID = checkGUID(*CommonName); + if (GUID) + F->setGUID(GUID); + } } } diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index adacb50d..32ec7ab 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -526,11 +526,9 @@ Error RewriteInstance::discoverStorage() { NextAvailableOffset = std::max(NextAvailableOffset, Phdr.p_offset + Phdr.p_filesz); - BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{Phdr.p_vaddr, - Phdr.p_memsz, - Phdr.p_offset, - Phdr.p_filesz, - Phdr.p_align}; + BC->SegmentMapInfo[Phdr.p_vaddr] = SegmentInfo{ + Phdr.p_vaddr, Phdr.p_memsz, Phdr.p_offset, + Phdr.p_filesz, Phdr.p_align, ((Phdr.p_flags & ELF::PF_X) != 0)}; if (BC->TheTriple->getArch() == llvm::Triple::x86_64 && Phdr.p_vaddr >= BinaryContext::KernelStartX86_64) BC->IsLinuxKernel = true; @@ -1533,7 +1531,7 @@ void RewriteInstance::createPLTBinaryFunction(uint64_t TargetAddress, MCSymbol *Symbol = Rel->Symbol; if (!Symbol) { - if (!BC->isAArch64() || !Rel->Addend || !Rel->isIRelative()) + if (BC->isRISCV() || !Rel->Addend || !Rel->isIRelative()) return; // IFUNC trampoline without symbol @@ -4247,7 +4245,6 @@ void RewriteInstance::addBoltInfoSection() { << "command line:"; for (int I = 0; I < Argc; ++I) DescOS << " " << Argv[I]; - DescOS.flush(); // Encode as GNU GOLD VERSION so it is easily printable by 'readelf -n' const std::string BoltInfo = @@ -4270,7 +4267,6 @@ void RewriteInstance::encodeBATSection() { raw_string_ostream DescOS(DescStr); BAT->write(*BC, DescOS); - DescOS.flush(); const std::string BoltInfo = BinarySection::encodeELFNote("BOLT", DescStr, BinarySection::NT_BOLT_BAT); diff --git a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp index 53a0c81..f3199eb 100644 --- a/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +++ b/bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp @@ -314,7 +314,6 @@ std::string InstrumentationRuntimeLibrary::buildTables(BinaryContext &BC) { } // Our string table lives immediately after descriptions vector OS << Summary->StringTable; - OS.flush(); return TablesStr; } diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp index 718e975..ecc2f10 100644 --- a/bolt/lib/Utils/Utils.cpp +++ b/bolt/lib/Utils/Utils.cpp @@ -66,15 +66,21 @@ std::string getUnescapedName(const StringRef &Name) { return Output; } -std::optional<StringRef> getLTOCommonName(const StringRef Name) { - for (StringRef Suffix : {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}) { +std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix, + ArrayRef<StringRef> Suffixes) { + for (StringRef Suffix : Suffixes) { size_t LTOSuffixPos = Name.find(Suffix); if (LTOSuffixPos != StringRef::npos) - return Name.substr(0, LTOSuffixPos + Suffix.size()); + return Name.substr(0, LTOSuffixPos + (KeepSuffix ? Suffix.size() : 0)); } return std::nullopt; } +std::optional<StringRef> getLTOCommonName(const StringRef Name) { + return getCommonName(Name, true, + {".__uniq.", ".lto_priv.", ".constprop.", ".llvm."}); +} + std::optional<uint8_t> readDWARFExpressionTargetReg(StringRef ExprBytes) { uint8_t Opcode = ExprBytes[0]; if (Opcode == dwarf::DW_CFA_def_cfa_expression) diff --git a/bolt/test/AArch64/constant_island_pie_update.s b/bolt/test/AArch64/constant_island_pie_update.s index 313e103..889f6b6 100644 --- a/bolt/test/AArch64/constant_island_pie_update.s +++ b/bolt/test/AArch64/constant_island_pie_update.s @@ -8,15 +8,15 @@ # RUN: %clang %cflags -fPIC -pie %t.o -o %t.rela.exe -nostdlib \ # RUN: -Wl,-q -Wl,-z,notext # RUN: llvm-bolt %t.rela.exe -o %t.rela.bolt --use-old-text=0 --lite=0 -# RUN: llvm-objdump -j .text -d --show-all-symbols %t.rela.bolt | FileCheck %s +# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.rela.bolt | FileCheck %s # RUN: llvm-readelf -rsW %t.rela.bolt | FileCheck --check-prefix=ELFCHECK %s // .relr.dyn # RUN: %clang %cflags -fPIC -pie %t.o -o %t.relr.exe -nostdlib \ # RUN: -Wl,-q -Wl,-z,notext -Wl,--pack-dyn-relocs=relr # RUN: llvm-objcopy --remove-section .rela.mytext %t.relr.exe # RUN: llvm-bolt %t.relr.exe -o %t.relr.bolt --use-old-text=0 --lite=0 -# RUN: llvm-objdump -j .text -d --show-all-symbols %t.relr.bolt | FileCheck %s -# RUN: llvm-objdump -j .text -d %t.relr.bolt | \ +# RUN: llvm-objdump -j .text -d -z --show-all-symbols %t.relr.bolt | FileCheck %s +# RUN: llvm-objdump -j .text -d -z %t.relr.bolt | \ # RUN: FileCheck %s --check-prefix=ADDENDCHECK # RUN: llvm-readelf -rsW %t.relr.bolt | FileCheck --check-prefix=RELRELFCHECK %s # RUN: llvm-readelf -SW %t.relr.bolt | FileCheck --check-prefix=RELRSZCHECK %s diff --git a/bolt/test/AArch64/ifunc.c b/bolt/test/AArch64/ifunc.test index 1744976..3da42c6 100644 --- a/bolt/test/AArch64/ifunc.c +++ b/bolt/test/AArch64/ifunc.test @@ -1,8 +1,6 @@ -// This test checks that IFUNC trampoline is properly recognised by BOLT - // With -O0 indirect call is performed on IPLT trampoline. IPLT trampoline // has IFUNC symbol. -// RUN: %clang %cflags -nostdlib -O0 -no-pie %s -fuse-ld=lld \ +// RUN: %clang %cflags -nostdlib -O0 -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \ // RUN: -o %t.O0.exe -Wl,-q // RUN: llvm-bolt %t.O0.exe -o %t.O0.bolt.exe \ // RUN: --print-disasm --print-only=_start | \ @@ -12,7 +10,7 @@ // Non-pie static executable doesn't generate PT_DYNAMIC, check relocation // is readed successfully and IPLT trampoline has been identified by bolt. -// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -no-pie \ +// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \ // RUN: -o %t.O3_nopie.exe -Wl,-q // RUN: llvm-readelf -l %t.O3_nopie.exe | \ // RUN: FileCheck --check-prefix=NON_DYN_CHECK %s @@ -25,7 +23,7 @@ // With -O3 direct call is performed on IPLT trampoline. IPLT trampoline // doesn't have associated symbol. The ifunc symbol has the same address as // IFUNC resolver function. -// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \ +// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \ // RUN: -o %t.O3_pie.exe -Wl,-q // RUN: llvm-bolt %t.O3_pie.exe -o %t.O3_pie.bolt.exe \ // RUN: --print-disasm --print-only=_start | \ @@ -35,8 +33,8 @@ // Check that IPLT trampoline located in .plt section are normally handled by // BOLT. The gnu-ld linker doesn't use separate .iplt section. -// RUN: %clang %cflags -nostdlib -O3 %s -fuse-ld=lld -fPIC -pie \ -// RUN: -T %p/Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q +// RUN: %clang %cflags -nostdlib -O3 %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \ +// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_O3_pie.exe -Wl,-q // RUN: llvm-bolt %t.iplt_O3_pie.exe -o %t.iplt_O3_pie.bolt.exe \ // RUN: --print-disasm --print-only=_start | \ // RUN: FileCheck --check-prefix=CHECK %s @@ -49,14 +47,3 @@ // REL_CHECK: R_AARCH64_IRELATIVE [[#%x,REL_SYMB_ADDR:]] // REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo - -static void foo() {} -static void bar() {} - -extern int use_foo; - -static void *resolver_foo(void) { return use_foo ? foo : bar; } - -__attribute__((ifunc("resolver_foo"))) void ifoo(); - -void _start() { ifoo(); } diff --git a/bolt/test/AArch64/internal-call.s b/bolt/test/AArch64/internal-call.s new file mode 100644 index 0000000..43b3a64 --- /dev/null +++ b/bolt/test/AArch64/internal-call.s @@ -0,0 +1,65 @@ +## Test that llvm-bolt detects internal calls and marks the containing function +## as non-simple. + +# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -static +# RUN: llvm-bolt %t.exe -o %t.null --print-all 2>&1 | FileCheck %s + +# CHECK: Binary Function "_start" after building cfg +# CHECK: internal call detected in function _start +# CHECK-NOT: Binary Function "_start" after validate-internal-calls + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc +.LBB00: + mov x11, #0x1fff + cmp x1, x11 + b.hi .Ltmp1 + +.entry1: + movi v4.16b, #0x0 + movi v5.16b, #0x0 + subs x1, x1, #0x8 + b.lo .Ltmp2 + +.entry2: + ld1 { v2.2d, v3.2d }, [x0], #32 + ld1 { v0.2d, v1.2d }, [x0], #32 + +.Ltmp2: + uaddlp v4.4s, v4.8h + uaddlp v4.2d, v4.4s + mov x0, v4.d[0] + mov x1, v4.d[1] + add x0, x0, x1 + ret x30 + +.Ltmp1: + mov x8, x30 + +.Lloop: + add x5, x0, x9 + mov x1, #0xface + movi v4.16b, #0x0 + movi v5.16b, #0x0 + bl .entry2 + add x4, x4, x0 + mov x0, x5 + sub x7, x7, x10 + cmp x7, x11 + b.hi .Lloop + + mov x1, x7 + bl .entry1 + add x0, x4, x0 + mov x30, x8 + ret x30 + + .cfi_endproc +.size _start, .-_start + +## Force relocation mode. + .reloc 0, R_AARCH64_NONE diff --git a/bolt/test/AArch64/update-weak-reference-symbol.s b/bolt/test/AArch64/update-weak-reference-symbol.s index 600a06b8..46819e8 100644 --- a/bolt/test/AArch64/update-weak-reference-symbol.s +++ b/bolt/test/AArch64/update-weak-reference-symbol.s @@ -3,7 +3,7 @@ // RUN: %clang %cflags -Wl,-z,notext -shared -Wl,-q %s -o %t.so // RUN: llvm-bolt %t.so -o %t.so.bolt // RUN: llvm-nm -n %t.so.bolt > %t.out.txt -// RUN: llvm-objdump -dj .rodata %t.so.bolt >> %t.out.txt +// RUN: llvm-objdump -z -dj .rodata %t.so.bolt >> %t.out.txt // RUN: FileCheck %s --input-file=%t.out.txt # CHECK: w func_1 diff --git a/bolt/test/Inputs/ifunc.c b/bolt/test/Inputs/ifunc.c new file mode 100644 index 0000000..3fa62be --- /dev/null +++ b/bolt/test/Inputs/ifunc.c @@ -0,0 +1,12 @@ +// This test checks that IFUNC trampoline is properly recognised by BOLT + +static void foo() {} +static void bar() {} + +extern int use_foo; + +static void *resolver_foo(void) { return use_foo ? foo : bar; } + +__attribute__((ifunc("resolver_foo"))) void ifoo(); + +void _start() { ifoo(); } diff --git a/bolt/test/AArch64/Inputs/iplt.ld b/bolt/test/Inputs/iplt.ld index 1e54a24..1e54a24 100644 --- a/bolt/test/AArch64/Inputs/iplt.ld +++ b/bolt/test/Inputs/iplt.ld diff --git a/bolt/test/X86/ifunc.test b/bolt/test/X86/ifunc.test new file mode 100644 index 0000000..befefbe --- /dev/null +++ b/bolt/test/X86/ifunc.test @@ -0,0 +1,47 @@ +// Check if BOLT can process ifunc symbols from .plt section +// RUN: %clang %cflags -nostdlib -no-pie %p/../Inputs/ifunc.c -fuse-ld=lld \ +// RUN: -o %t.exe -Wl,-q +// RUN: llvm-bolt %t.exe -o %t.bolt.exe \ +// RUN: --print-disasm --print-only=_start | \ +// RUN: FileCheck --check-prefix=CHECK %s +// RUN: llvm-readelf -aW %t.bolt.exe | \ +// RUN: FileCheck --check-prefix=REL_CHECK %s + +// Check if BOLT can process ifunc symbols from .plt section in non-pie static +// executable case. +// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -no-pie \ +// RUN: -o %t.nopie.exe -Wl,-q +// RUN: llvm-readelf -l %t.nopie.exe | \ +// RUN: FileCheck --check-prefix=NON_DYN_CHECK %s +// RUN: llvm-bolt %t.nopie.exe -o %t.nopie.bolt.exe \ +// RUN: --print-disasm --print-only=_start | \ +// RUN: FileCheck --check-prefix=CHECK %s +// RUN: llvm-readelf -aW %t.nopie.bolt.exe | \ +// RUN: FileCheck --check-prefix=REL_CHECK %s + +// Check if BOLT can process ifunc symbols from .plt section in pie executable +// case. +// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \ +// RUN: -o %t.pie.exe -Wl,-q +// RUN: llvm-bolt %t.pie.exe -o %t.pie.bolt.exe \ +// RUN: --print-disasm --print-only=_start | \ +// RUN: FileCheck --check-prefix=CHECK %s +// RUN: llvm-readelf -aW %t.pie.bolt.exe | \ +// RUN: FileCheck --check-prefix=REL_CHECK %s + +// Check that IPLT trampoline located in .plt section are normally handled by +// BOLT. The gnu-ld linker doesn't use separate .iplt section. +// RUN: %clang %cflags -nostdlib %p/../Inputs/ifunc.c -fuse-ld=lld -fPIC -pie \ +// RUN: -T %p/../Inputs/iplt.ld -o %t.iplt_pie.exe -Wl,-q +// RUN: llvm-bolt %t.iplt_pie.exe -o %t.iplt_pie.bolt.exe \ +// RUN: --print-disasm --print-only=_start | \ +// RUN: FileCheck --check-prefix=CHECK %s +// RUN: llvm-readelf -aW %t.iplt_pie.bolt.exe | \ +// RUN: FileCheck --check-prefix=REL_CHECK %s + +// NON_DYN_CHECK-NOT: DYNAMIC + +// CHECK: callq "resolver_foo/1@PLT" + +// REL_CHECK: R_X86_64_IRELATIVE [[#%x,REL_SYMB_ADDR:]] +// REL_CHECK: [[#REL_SYMB_ADDR]] {{.*}} FUNC {{.*}} resolver_foo diff --git a/bolt/test/X86/log.test b/bolt/test/X86/log.test index 42109db..2c006e9 100644 --- a/bolt/test/X86/log.test +++ b/bolt/test/X86/log.test @@ -6,7 +6,7 @@ RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe RUN: llvm-bolt %t.exe -o %t.null --data %p/Inputs/blarge.fdata -v=2 \ RUN: --reorder-blocks=normal --print-finalized --log-file=%t.log 2>&1 \ RUN: | FileCheck --check-prefix=CHECK --allow-empty %s -RUN: cat %t.log | FileCheck %s --check-prefix=CHECK-LOG +RUN: FileCheck %s --check-prefix=CHECK-LOG --input-file %t.log CHECK-NOT: BOLT-INFO CHECK-NOT: BOLT-WARNING @@ -16,4 +16,4 @@ CHECK-NOT: BOLT-ERROR CHECK-LOG: BOLT-INFO: Target architecture CHECK-LOG: BOLT-INFO: BOLT version CHECK-LOG: BOLT-INFO: basic block reordering modified layout -CHECK-LOG: Binary Function "usqrt" +CHECK-LOG: Binary Function "main" diff --git a/bolt/test/X86/print-only-section.s b/bolt/test/X86/print-only-section.s new file mode 100644 index 0000000..d580818 --- /dev/null +++ b/bolt/test/X86/print-only-section.s @@ -0,0 +1,29 @@ +## Check that --print-only flag works with sections. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t.exe +# RUN: llvm-bolt %t.exe -o %t.out --print-cfg --print-only=unused_code 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: Binary Function "foo" +# CHECK-NOT: Binary Function "_start" + + .text + .globl _start + .type _start, %function +_start: + .cfi_startproc + ret + .cfi_endproc + .size _start, .-_start + + .section unused_code,"ax",@progbits + .globl foo + .type foo, %function +foo: + .cfi_startproc + ret + .cfi_endproc + .size foo, .-foo diff --git a/bolt/test/X86/pseudoprobe-decoding-inline.test b/bolt/test/X86/pseudoprobe-decoding-inline.test index b361551..e5e8aad 100644 --- a/bolt/test/X86/pseudoprobe-decoding-inline.test +++ b/bolt/test/X86/pseudoprobe-decoding-inline.test @@ -6,37 +6,39 @@ # PREAGG: B X:0 #main# 1 0 ## Check pseudo-probes in regular YAML profile (non-BOLTed binary) # RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin %t.preagg PREAGG -# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-use-pseudo-probes +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes # RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML ## Check pseudo-probes in BAT YAML profile (BOLTed binary) # RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG -# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-use-pseudo-probes +# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes # RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML # CHECK-YAML: name: bar # CHECK-YAML: - bid: 0 -# CHECK-YAML: pseudo_probes: [ { guid: 0xE413754A191DB537, id: 1, type: 0 }, { guid: 0xE413754A191DB537, id: 4, type: 0 } ] -# CHECK-YAML: guid: 0xE413754A191DB537 -# CHECK-YAML: pseudo_probe_desc_hash: 0x10E852DA94 +# CHECK-YAML: probes: [ { blx: 9 } ] +# CHECK-YAML: inline_tree: [ { } ] # # CHECK-YAML: name: foo # CHECK-YAML: - bid: 0 -# CHECK-YAML: pseudo_probes: [ { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ] -# CHECK-YAML: guid: 0x5CF8C24CDB18BDAC -# CHECK-YAML: pseudo_probe_desc_hash: 0x200205A19C5B4 +# CHECK-YAML: probes: [ { blx: 3 } ] +# CHECK-YAML: inline_tree: [ { g: 1 }, { g: 0, cs: 8 } ] # # CHECK-YAML: name: main # CHECK-YAML: - bid: 0 -# CHECK-YAML: pseudo_probes: [ { guid: 0xDB956436E78DD5FA, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 1, type: 0 }, { guid: 0x5CF8C24CDB18BDAC, id: 2, type: 0 } ] -# CHECK-YAML: guid: 0xDB956436E78DD5FA -# CHECK-YAML: pseudo_probe_desc_hash: 0x10000FFFFFFFF +# CHECK-YAML: probes: [ { blx: 3, id: 1 }, { blx: 1 } ] +# CHECK-YAML: inline_tree: [ { g: 2 }, { g: 1, cs: 2 }, { g: 0, p: 1, cs: 8 } ] # -## Check that without --profile-use-pseudo-probes option, no pseudo probes are +# CHECK-YAML: pseudo_probe_desc: +# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0x5CF8C24CDB18BDAC, 0xDB956436E78DD5FA ] +# CHECK-YAML-NEXT: gh: [ 2, 0, 1 ] +# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ] +# +## Check that without --profile-write-pseudo-probes option, no pseudo probes are ## generated -# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata -# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-NO-OPT -# CHECK-NO-OPT-NOT: pseudo_probes -# CHECK-NO-OPT-NOT: guid -# CHECK-NO-OPT-NOT: pseudo_probe_desc_hash +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/inline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata +# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT +# CHECK-NO-OPT-NOT: probes: +# CHECK-NO-OPT-NOT: inline_tree: +# CHECK-NO-OPT-NOT: pseudo_probe_desc: CHECK: Report of decoding input pseudo probe binaries diff --git a/bolt/test/X86/pseudoprobe-decoding-noinline.test b/bolt/test/X86/pseudoprobe-decoding-noinline.test index 5dd6c2e..36a2fab 100644 --- a/bolt/test/X86/pseudoprobe-decoding-noinline.test +++ b/bolt/test/X86/pseudoprobe-decoding-noinline.test @@ -1,6 +1,45 @@ # REQUIRES: system-linux -# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt 2>&1 | FileCheck %s +# RUN: llvm-bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin --print-pseudo-probes=all -o %t.bolt --lite=0 --enable-bat 2>&1 | FileCheck %s +# PREAGG: B X:0 #foo# 1 0 +# PREAGG: B X:0 #bar# 1 0 +# PREAGG: B X:0 #main# 1 0 + +## Check pseudo-probes in regular YAML profile (non-BOLTed binary) +# RUN: link_fdata %s %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin %t.preagg PREAGG +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml -o %t.fdata --profile-write-pseudo-probes +# RUN: FileCheck --input-file %t.yaml %s --check-prefix CHECK-YAML +## Check pseudo-probes in BAT YAML profile (BOLTed binary) +# RUN: link_fdata %s %t.bolt %t.preagg2 PREAGG +# RUN: perf2bolt %t.bolt -p %t.preagg2 --pa -w %t.yaml2 -o %t.fdata2 --profile-write-pseudo-probes +# RUN: FileCheck --input-file %t.yaml2 %s --check-prefix CHECK-YAML +# CHECK-YAML: name: bar +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 9 } ] +# CHECK-YAML: inline_tree: [ { } ] +# +# CHECK-YAML: name: foo +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 3 } ] +# CHECK-YAML: inline_tree: [ { g: 2 } ] +# +# CHECK-YAML: name: main +# CHECK-YAML: - bid: 0 +# CHECK-YAML: probes: [ { blx: 1, call: [ 2 ] } ] +# CHECK-YAML: inline_tree: [ { g: 1 } ] +# +# CHECK-YAML: pseudo_probe_desc: +# CHECK-YAML-NEXT: gs: [ 0xE413754A191DB537, 0xDB956436E78DD5FA, 0x5CF8C24CDB18BDAC ] +# CHECK-YAML-NEXT: gh: [ 2, 1, 0 ] +# CHECK-YAML-NEXT: hs: [ 0x200205A19C5B4, 0x10000FFFFFFFF, 0x10E852DA94 ] +# +## Check that without --profile-write-pseudo-probes option, no pseudo probes are +## generated +# RUN: perf2bolt %S/../../../llvm/test/tools/llvm-profgen/Inputs/noinline-cs-pseudoprobe.perfbin -p %t.preagg --pa -w %t.yaml3 -o %t.fdata +# RUN: FileCheck --input-file %t.yaml3 %s --check-prefix CHECK-NO-OPT +# CHECK-NO-OPT-NOT: probes: +# CHECK-NO-OPT-NOT: inline_tree: +# CHECK-NO-OPT-NOT: pseudo_probe_desc: ;; Report of decoding input pseudo probe binaries ; CHECK: GUID: 6699318081062747564 Name: foo diff --git a/bolt/test/X86/yaml-unknown-keys.test b/bolt/test/X86/yaml-unknown-keys.test new file mode 100644 index 0000000..cbcf9a4 --- /dev/null +++ b/bolt/test/X86/yaml-unknown-keys.test @@ -0,0 +1,50 @@ +## Test that BOLT gracefully handles a YAML profile with unknown keys. + +# REQUIRES: system-linux +# RUN: split-file %s %t +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o +# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib +# RUN: llvm-bolt %t.exe -o %t.null --data %t/profile.yaml \ +# RUN: --profile-ignore-hash -v=1 2>&1 | FileCheck %s +# CHECK: warning: unknown key 'unknown-header-key' +# CHECK: warning: unknown key 'unknown_succ_key' +# CHECK: warning: unknown key 'unknown_block_key' +# CHECK: warning: unknown key 'unknown_function_key' +# CHECK: warning: unknown key 'unknown_toplev_key' +#--- main.s + .globl main + .type main, %function +main: + .cfi_startproc + cmpl $0x0, %eax + jne .LBB0 +.LBB0: + retq + .cfi_endproc +.size main, .-main +#--- profile.yaml +--- +header: + profile-version: 1 + binary-name: 'yaml-multiple-profiles.test.tmp.exe' + binary-build-id: '<unknown>' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + unknown-header-key: true +functions: + - name: 'main' + fid: 1 + hash: 0x50BBA3441D436491 + exec: 1 + nblocks: 1 + blocks: + - bid: 0 + insns: 2 + hash: 0x4D4D8FAF7D4C0000 + succ: [ { bid: 1, cnt: 0, unknown_succ_key: 0x10 } ] + unknown_block_key: [ ] + unknown_function_key: 1 +unknown_toplev_key: '' +... diff --git a/bolt/test/lit.local.cfg b/bolt/test/lit.local.cfg index 8aa5f15..e2fa0a4 100644 --- a/bolt/test/lit.local.cfg +++ b/bolt/test/lit.local.cfg @@ -1,6 +1,6 @@ host_linux_triple = config.target_triple.split("-")[0] + "-unknown-linux-gnu" -common_linker_flags = "-fuse-ld=lld -Wl,--unresolved-symbols=ignore-all" -flags = f"--target={host_linux_triple} {common_linker_flags}" +common_linker_flags = "-fuse-ld=lld -Wl,--unresolved-symbols=ignore-all -pie" +flags = f"--target={host_linux_triple} -fPIE {common_linker_flags}" config.substitutions.insert(0, ("%cflags", f"%cflags {flags}")) config.substitutions.insert(0, ("%cxxflags", f"%cxxflags {flags}")) diff --git a/bolt/test/merge-fdata-uninitialized-header.test b/bolt/test/merge-fdata-uninitialized-header.test new file mode 100644 index 0000000..5336961 --- /dev/null +++ b/bolt/test/merge-fdata-uninitialized-header.test @@ -0,0 +1,45 @@ +## Test that merge-fdata correctly handles YAML header with an uninitialized +## fields. a.yaml does not have hash-func set and it used to crash merge-fdata. + +# REQUIRES: system-linux + +# RUN: split-file %s %t +# RUN: not merge-fdata %t/a.yaml %t/b.yaml 2>&1 | FileCheck %s + +# CHECK: cannot merge profiles with different hash functions + +#--- a.yaml +--- +header: + profile-version: 1 + binary-name: 'a.out' + binary-build-id: '<unknown>' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false +functions: + - name: 'main' + fid: 1 + hash: 0x50BBA3441D436491 + exec: 1 + nblocks: 0 +... +#--- b.yaml +--- +header: + profile-version: 1 + binary-name: 'a.out' + binary-build-id: '<unknown>' + profile-flags: [ lbr ] + profile-origin: branch profile reader + profile-events: '' + dfs-order: false + hash-func: xxh3 +functions: + - name: 'main' + fid: 1 + hash: 0x50BBA3441D436491 + exec: 1 + nblocks: 0 +... diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg index 4ee9ad0..0fecf91 100644 --- a/bolt/test/perf2bolt/lit.local.cfg +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -1,4 +1,5 @@ import shutil +import subprocess -if shutil.which("perf") is not None: - config.available_features.add("perf")
\ No newline at end of file +if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0: + config.available_features.add("perf") diff --git a/bolt/test/perf2bolt/perf_test.test b/bolt/test/perf2bolt/perf_test.test index 44db899..7bec442 100644 --- a/bolt/test/perf2bolt/perf_test.test +++ b/bolt/test/perf2bolt/perf_test.test @@ -3,15 +3,12 @@ REQUIRES: system-linux, perf RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t -RUN: perf record -e cycles:u -o %t2 -- %t +RUN: perf record -Fmax -e cycles:u -o %t2 -- %t RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id 2>&1 | FileCheck %s CHECK-NOT: PERF2BOLT-ERROR CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection. RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4 -RUN: perf record -e cycles:u -o %t5 -- %t4 -RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s --check-prefix=CHECK-NO-PIE - -CHECK-NO-PIE-NOT: PERF2BOLT-ERROR -CHECK-NO-PIE-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
\ No newline at end of file +RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4 +RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id 2>&1 | FileCheck %s diff --git a/bolt/tools/merge-fdata/merge-fdata.cpp b/bolt/tools/merge-fdata/merge-fdata.cpp index f2ac5ad..89ca46c 100644 --- a/bolt/tools/merge-fdata/merge-fdata.cpp +++ b/bolt/tools/merge-fdata/merge-fdata.cpp @@ -145,6 +145,10 @@ void mergeProfileHeaders(BinaryProfileHeader &MergedHeader, errs() << "WARNING: merging profiles with different sampling events\n"; MergedHeader.EventNames += "," + Header.EventNames; } + + if (MergedHeader.HashFunction != Header.HashFunction) + report_error("merge conflict", + "cannot merge profiles with different hash functions"); } void mergeBasicBlockProfile(BinaryBasicBlockProfile &MergedBB, @@ -386,12 +390,14 @@ int main(int argc, char **argv) { // Merged information for all functions. StringMap<BinaryFunctionProfile> MergedBFs; + bool FirstHeader = true; for (std::string &InputDataFilename : Inputs) { ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFileOrSTDIN(InputDataFilename); if (std::error_code EC = MB.getError()) report_error(InputDataFilename, EC); yaml::Input YamlInput(MB.get()->getBuffer()); + YamlInput.setAllowUnknownKeys(true); errs() << "Merging data from " << InputDataFilename << "...\n"; @@ -408,7 +414,12 @@ int main(int argc, char **argv) { } // Merge the header. - mergeProfileHeaders(MergedHeader, BP.Header); + if (FirstHeader) { + MergedHeader = BP.Header; + FirstHeader = false; + } else { + mergeProfileHeaders(MergedHeader, BP.Header); + } // Do the function merge. for (BinaryFunctionProfile &BF : BP.Functions) { diff --git a/bolt/unittests/Core/BinaryContext.cpp b/bolt/unittests/Core/BinaryContext.cpp index 6c32881..05b898d 100644 --- a/bolt/unittests/Core/BinaryContext.cpp +++ b/bolt/unittests/Core/BinaryContext.cpp @@ -160,13 +160,14 @@ TEST_P(BinaryContextTester, FlushPendingRelocJUMP26) { TEST_P(BinaryContextTester, BaseAddress) { // Check that base address calculation is correct for a binary with the // following segment layout: - BC->SegmentMapInfo[0] = SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000}; + BC->SegmentMapInfo[0] = + SegmentInfo{0, 0x10e8c2b4, 0, 0x10e8c2b4, 0x1000, true}; BC->SegmentMapInfo[0x10e8d2b4] = - SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000}; + SegmentInfo{0x10e8d2b4, 0x3952faec, 0x10e8c2b4, 0x3952faec, 0x1000, true}; BC->SegmentMapInfo[0x4a3bddc0] = - SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000}; + SegmentInfo{0x4a3bddc0, 0x148e828, 0x4a3bbdc0, 0x148e828, 0x1000, true}; BC->SegmentMapInfo[0x4b84d5e8] = - SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000}; + SegmentInfo{0x4b84d5e8, 0x294f830, 0x4b84a5e8, 0x3d3820, 0x1000, true}; std::optional<uint64_t> BaseAddress = BC->getBaseAddressForMapping(0x7f13f5556000, 0x10e8c000); @@ -181,13 +182,13 @@ TEST_P(BinaryContextTester, BaseAddress2) { // Check that base address calculation is correct for a binary if the // alignment in ELF file are different from pagesize. // The segment layout is as follows: - BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000}; + BC->SegmentMapInfo[0] = SegmentInfo{0, 0x2177c, 0, 0x2177c, 0x10000, true}; BC->SegmentMapInfo[0x31860] = - SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000}; + SegmentInfo{0x31860, 0x370, 0x21860, 0x370, 0x10000, true}; BC->SegmentMapInfo[0x41c20] = - SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000}; + SegmentInfo{0x41c20, 0x1f8, 0x21c20, 0x1f8, 0x10000, true}; BC->SegmentMapInfo[0x54e18] = - SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000}; + SegmentInfo{0x54e18, 0x51, 0x24e18, 0x51, 0x10000, true}; std::optional<uint64_t> BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x21000); @@ -197,3 +198,22 @@ TEST_P(BinaryContextTester, BaseAddress2) { BaseAddress = BC->getBaseAddressForMapping(0xaaaaea444000, 0x11000); ASSERT_FALSE(BaseAddress.has_value()); } + +TEST_P(BinaryContextTester, BaseAddressSegmentsSmallerThanAlignment) { + // Check that the correct segment is used to compute the base address + // when multiple segments are close together in the ELF file (closer + // than the required alignment in the process space). + // See https://github.com/llvm/llvm-project/issues/109384 + BC->SegmentMapInfo[0] = SegmentInfo{0, 0x1d1c, 0, 0x1d1c, 0x10000, false}; + BC->SegmentMapInfo[0x11d40] = + SegmentInfo{0x11d40, 0x11e0, 0x1d40, 0x11e0, 0x10000, true}; + BC->SegmentMapInfo[0x22f20] = + SegmentInfo{0x22f20, 0x10e0, 0x2f20, 0x1f0, 0x10000, false}; + BC->SegmentMapInfo[0x33110] = + SegmentInfo{0x33110, 0x89, 0x3110, 0x88, 0x10000, false}; + + std::optional<uint64_t> BaseAddress = + BC->getBaseAddressForMapping(0xaaaaaaab1000, 0x1000); + ASSERT_TRUE(BaseAddress.has_value()); + ASSERT_EQ(*BaseAddress, 0xaaaaaaaa0000ULL); +}
\ No newline at end of file |