diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/ProfileData/InstrProf.h | 5 | ||||
-rw-r--r-- | llvm/lib/ProfileData/InstrProf.cpp | 76 | ||||
-rw-r--r-- | llvm/test/tools/llvm-profdata/show-order-error.proftext | 27 | ||||
-rw-r--r-- | llvm/test/tools/llvm-profdata/show-order.proftext | 11 | ||||
-rw-r--r-- | llvm/tools/llvm-profdata/llvm-profdata.cpp | 43 | ||||
-rw-r--r-- | llvm/unittests/ProfileData/BPFunctionNodeTest.cpp | 33 |
6 files changed, 144 insertions, 51 deletions
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 2263376..2cee928 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -385,8 +385,9 @@ struct TemporalProfTraceTy { /// Use a set of temporal profile traces to create a list of balanced /// partitioning function nodes used by BalancedPartitioning to generate a /// function order that reduces page faults during startup - static std::vector<BPFunctionNode> - createBPFunctionNodes(ArrayRef<TemporalProfTraceTy> Traces); + static void createBPFunctionNodes(ArrayRef<TemporalProfTraceTy> Traces, + std::vector<BPFunctionNode> &Nodes, + bool RemoveOutlierUNs = true); }; inline std::error_code make_error_code(instrprof_error E) { diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 04dd7ba..f9cd71b 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1002,46 +1002,60 @@ void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, ValueSites.emplace_back(VData, VData + N); } -std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes( - ArrayRef<TemporalProfTraceTy> Traces) { +void TemporalProfTraceTy::createBPFunctionNodes( + ArrayRef<TemporalProfTraceTy> Traces, std::vector<BPFunctionNode> &Nodes, + bool RemoveOutlierUNs) { using IDT = BPFunctionNode::IDT; using UtilityNodeT = BPFunctionNode::UtilityNodeT; - // Collect all function IDs ordered by their smallest timestamp. This will be - // used as the initial FunctionNode order. - SetVector<IDT> FunctionIds; - size_t LargestTraceSize = 0; - for (auto &Trace : Traces) - LargestTraceSize = - std::max(LargestTraceSize, Trace.FunctionNameRefs.size()); - for (size_t Timestamp = 0; Timestamp < LargestTraceSize; Timestamp++) - for (auto &Trace : Traces) - if (Timestamp < Trace.FunctionNameRefs.size()) - FunctionIds.insert(Trace.FunctionNameRefs[Timestamp]); - - const int N = Log2_64(LargestTraceSize) + 1; - + UtilityNodeT MaxUN = 0; + DenseMap<IDT, size_t> IdToFirstTimestamp; + DenseMap<IDT, UtilityNodeT> IdToFirstUN; + DenseMap<IDT, SmallVector<UtilityNodeT>> IdToUNs; // TODO: We need to use the Trace.Weight field to give more weight to more // important utilities - DenseMap<IDT, SmallVector<UtilityNodeT, 4>> FuncGroups; - for (size_t TraceIdx = 0; TraceIdx < Traces.size(); TraceIdx++) { - auto &Trace = Traces[TraceIdx].FunctionNameRefs; - for (size_t Timestamp = 0; Timestamp < Trace.size(); Timestamp++) { - for (int I = Log2_64(Timestamp + 1); I < N; I++) { - auto FunctionId = Trace[Timestamp]; - UtilityNodeT GroupId = TraceIdx * N + I; - FuncGroups[FunctionId].push_back(GroupId); + for (auto &Trace : Traces) { + size_t CutoffTimestamp = 1; + for (size_t Timestamp = 0; Timestamp < Trace.FunctionNameRefs.size(); + Timestamp++) { + IDT Id = Trace.FunctionNameRefs[Timestamp]; + auto [It, WasInserted] = IdToFirstTimestamp.try_emplace(Id, Timestamp); + if (!WasInserted) + It->getSecond() = std::min<size_t>(It->getSecond(), Timestamp); + if (Timestamp >= CutoffTimestamp) { + ++MaxUN; + CutoffTimestamp = 2 * Timestamp; } + IdToFirstUN.try_emplace(Id, MaxUN); } + for (auto &[Id, FirstUN] : IdToFirstUN) + for (auto UN = FirstUN; UN <= MaxUN; ++UN) + IdToUNs[Id].push_back(UN); + ++MaxUN; + IdToFirstUN.clear(); } - std::vector<BPFunctionNode> Nodes; - for (auto Id : FunctionIds) { - auto &UNs = FuncGroups[Id]; - llvm::sort(UNs); - UNs.erase(std::unique(UNs.begin(), UNs.end()), UNs.end()); - Nodes.emplace_back(Id, UNs); + if (RemoveOutlierUNs) { + DenseMap<UtilityNodeT, unsigned> UNFrequency; + for (auto &[Id, UNs] : IdToUNs) + for (auto &UN : UNs) + ++UNFrequency[UN]; + // Filter out utility nodes that are too infrequent or too prevalent to make + // BalancedPartitioning more effective. + for (auto &[Id, UNs] : IdToUNs) + llvm::erase_if(UNs, [&](auto &UN) { + return UNFrequency[UN] <= 1 || 2 * UNFrequency[UN] > IdToUNs.size(); + }); } - return Nodes; + + for (auto &[Id, UNs] : IdToUNs) + Nodes.emplace_back(Id, UNs); + + // Since BalancedPartitioning is sensitive to the initial order, we explicitly + // order nodes by their earliest timestamp. + llvm::sort(Nodes, [&](auto &L, auto &R) { + return std::make_pair(IdToFirstTimestamp[L.Id], L.Id) < + std::make_pair(IdToFirstTimestamp[R.Id], R.Id); + }); } #define INSTR_PROF_COMMON_API_IMPL diff --git a/llvm/test/tools/llvm-profdata/show-order-error.proftext b/llvm/test/tools/llvm-profdata/show-order-error.proftext new file mode 100644 index 0000000..633f1a9 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/show-order-error.proftext @@ -0,0 +1,27 @@ +# RUN: not llvm-profdata order %s --num-test-traces=10 2>&1 | FileCheck %s + +# CHECK: --num-test-traces must be smaller than the total number of traces + +# Header +:ir +:temporal_prof_traces +# Num Traces +1 +# Trace Stream Size: +1 +# Weight +1 +a, b + +a +# Func Hash: +0x1234 +# Num Counters: +1 +# Counter Values: +101 + +b +0x5678 +1 +202 diff --git a/llvm/test/tools/llvm-profdata/show-order.proftext b/llvm/test/tools/llvm-profdata/show-order.proftext index 8ef2684..28eb1b9 100644 --- a/llvm/test/tools/llvm-profdata/show-order.proftext +++ b/llvm/test/tools/llvm-profdata/show-order.proftext @@ -1,4 +1,6 @@ -# RUN: llvm-profdata order %s | FileCheck %s +# RUN: llvm-profdata order %s --num-test-traces=1 | FileCheck %s + +# CHECK: # Total area under the page fault curve: 4.000000e+00 # CHECK: a # CHECK: b @@ -9,9 +11,9 @@ :ir :temporal_prof_traces # Num Traces -3 +4 # Trace Stream Size: -3 +4 # Weight 1 a, main.c:b, c @@ -21,6 +23,9 @@ a, x, main.c:b, c # Weight 1 a, main.c:b, c +# Weight +1 +a, main.c:b, c, x a # Func Hash: diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index 693af06..28c3afa 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -340,7 +340,7 @@ cl::opt<unsigned long long> OverlapValueCutoff( "profile with max count value greater then the parameter value"), cl::sub(OverlapSubcommand)); -// Options unique to show subcommand. +// Options specific to show subcommand. cl::opt<bool> ShowCounts("counts", cl::init(false), cl::desc("Show counter values for shown functions"), cl::sub(ShowSubcommand)); @@ -439,6 +439,14 @@ cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false), cl::desc("Show profile version. "), cl::sub(ShowSubcommand)); +// Options specific to order subcommand. +cl::opt<unsigned> + NumTestTraces("num-test-traces", cl::init(0), + cl::desc("Keep aside the last <num-test-traces> traces in " + "the profile when computing the function order and " + "instead use them to evaluate that order"), + cl::sub(OrderSubcommand)); + // We use this string to indicate that there are // multiple static functions map to the same name. const std::string DuplicateNameStr = "----"; @@ -3277,13 +3285,42 @@ static int order_main() { // Read all entries (void)I; } - auto &Traces = Reader->getTemporalProfTraces(); - auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces); + ArrayRef Traces = Reader->getTemporalProfTraces(); + if (NumTestTraces && NumTestTraces >= Traces.size()) + exitWithError( + "--" + NumTestTraces.ArgStr + + " must be smaller than the total number of traces: expected: < " + + Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces)); + ArrayRef TestTraces = Traces.take_back(NumTestTraces); + Traces = Traces.drop_back(NumTestTraces); + + std::vector<BPFunctionNode> Nodes; + TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes); BalancedPartitioningConfig Config; BalancedPartitioning BP(Config); BP.run(Nodes); OS << "# Ordered " << Nodes.size() << " functions\n"; + if (!TestTraces.empty()) { + // Since we don't know the symbol sizes, we assume 32 functions per page. + DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber; + for (auto &Node : Nodes) + IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32; + + SmallSet<unsigned, 0> TouchedPages; + unsigned Area = 0; + for (auto &Trace : TestTraces) { + for (auto Id : Trace.FunctionNameRefs) { + auto It = IdToPageNumber.find(Id); + if (It == IdToPageNumber.end()) + continue; + TouchedPages.insert(It->getSecond()); + Area += TouchedPages.size(); + } + TouchedPages.clear(); + } + OS << "# Total area under the page fault curve: " << (float)Area << "\n"; + } OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the " "linkage and this output does not take that into account. Some " "post-processing may be required before passing to the linker via " diff --git a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp index 6af6f1b..24586b5 100644 --- a/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp +++ b/llvm/unittests/ProfileData/BPFunctionNodeTest.cpp @@ -8,7 +8,6 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/BalancedPartitioning.h" -#include "llvm/Testing/Support/SupportHelpers.h" #include "gmock/gmock.h" #include "gtest/gtest.h" @@ -31,22 +30,32 @@ TEST(BPFunctionNodeTest, Basic) { UnorderedElementsAreArray(UNs))); }; - auto Nodes = TemporalProfTraceTy::createBPFunctionNodes({ - TemporalProfTraceTy({0, 1, 2, 3}), - }); + std::vector<BPFunctionNode> Nodes; + TemporalProfTraceTy::createBPFunctionNodes( + {TemporalProfTraceTy({0, 1, 2, 3})}, Nodes, /*RemoveOutlierUNs=*/false); + // Utility nodes that are too infrequent or too prevalent are filtered out. EXPECT_THAT(Nodes, UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}), - NodeIs(2, {1, 2}), NodeIs(3, {2}))); + NodeIs(2, {2}), NodeIs(3, {2}))); - Nodes = TemporalProfTraceTy::createBPFunctionNodes({ - TemporalProfTraceTy({0, 1, 2, 3, 4}), - TemporalProfTraceTy({4, 2}), - }); + Nodes.clear(); + TemporalProfTraceTy::createBPFunctionNodes( + {TemporalProfTraceTy({0, 1, 2, 3, 4}), TemporalProfTraceTy({4, 2})}, + Nodes, /*RemoveOutlierUNs=*/false); EXPECT_THAT(Nodes, - UnorderedElementsAre(NodeIs(0, {0, 1, 2}), NodeIs(1, {1, 2}), - NodeIs(2, {1, 2, 4, 5}), NodeIs(3, {2}), - NodeIs(4, {2, 3, 4, 5}))); + UnorderedElementsAre(NodeIs(0, {0, 1, 2, 3}), + NodeIs(1, {1, 2, 3}), NodeIs(2, {2, 3, 5}), + NodeIs(3, {2, 3}), NodeIs(4, {3, 4, 5}))); + + Nodes.clear(); + TemporalProfTraceTy::createBPFunctionNodes( + {TemporalProfTraceTy({0, 1, 2, 3, 4}), TemporalProfTraceTy({4, 2})}, + Nodes, /*RemoveOutlierUNs=*/true); + + EXPECT_THAT(Nodes, UnorderedElementsAre(NodeIs(0, {1}), NodeIs(1, {1}), + NodeIs(2, {5}), NodeIs(3, {}), + NodeIs(4, {5}))); } } // end namespace llvm |