[InstrProf] Evaluate function order using test traces (#92451)

The `llvm-profdata order` command is used to compute a function order using traces from the input profile. Add the `--num-test-traces` flag to keep aside N traces to evalute this order. These test traces are assumed to be the actual function execution order in some experiment. The output is a number that represents how many page faults we got. Lower is better. I tested on a large profile I already had. ``` llvm-profdata order default.profdata --num-test-traces=30 # Ordered 149103 functions # Total area under the page fault curve: 2.271827e+09 ... ``` I also improved `TemporalProfTraceTy::createBPFunctionNodes()` in a few ways: * Simplified how `UN`s are computed * Change how the initial `Node` order is computed * Filter out rare and common `UN`s * Output vector is an aliased argument instead of a return These changes slightly improved the evaluation in my test. ``` llvm-profdata order default.profdata --num-test-traces=30 # Ordered 149103 functions # Total area under the page fault curve: 2.268586e+09 ... ```
author: Ellis Hoag <ellis.sparky.hoag@gmail.com> 2024-05-23 11:19:29 -0700
committer: GitHub <noreply@github.com> 2024-05-23 11:19:29 -0700
commit: 73eb9b33147ba5157cbf5d8276ee718629dfbbda (patch)
tree: df6b29afdcf4317955790da4a4b83ea0be9acff8 /llvm/tools
parent: dd2d132fb3521f37f44656edd65cca75430c251e (diff)
download: llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.zip
llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.tar.gz
llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.tar.bz2
1 files changed, 40 insertions, 3 deletions
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 693af06..28c3afa 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -340,7 +340,7 @@ cl::opt<unsigned long long> OverlapValueCutoff(
         "profile with max count value greater then the parameter value"),
     cl::sub(OverlapSubcommand));
 
-// Options unique to show subcommand.
+// Options specific to show subcommand.
 cl::opt<bool> ShowCounts("counts", cl::init(false),
                          cl::desc("Show counter values for shown functions"),
                          cl::sub(ShowSubcommand));
@@ -439,6 +439,14 @@ cl::opt<bool> ShowProfileVersion("profile-version", cl::init(false),
                                  cl::desc("Show profile version. "),
                                  cl::sub(ShowSubcommand));
 
+// Options specific to order subcommand.
+cl::opt<unsigned>
+    NumTestTraces("num-test-traces", cl::init(0),
+                  cl::desc("Keep aside the last <num-test-traces> traces in "
+                           "the profile when computing the function order and "
+                           "instead use them to evaluate that order"),
+                  cl::sub(OrderSubcommand));
+
 // We use this string to indicate that there are
 // multiple static functions map to the same name.
 const std::string DuplicateNameStr = "----";
@@ -3277,13 +3285,42 @@ static int order_main() {
     // Read all entries
     (void)I;
   }
-  auto &Traces = Reader->getTemporalProfTraces();
-  auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
+  ArrayRef Traces = Reader->getTemporalProfTraces();
+  if (NumTestTraces && NumTestTraces >= Traces.size())
+    exitWithError(
+        "--" + NumTestTraces.ArgStr +
+        " must be smaller than the total number of traces: expected: < " +
+        Twine(Traces.size()) + ", actual: " + Twine(NumTestTraces));
+  ArrayRef TestTraces = Traces.take_back(NumTestTraces);
+  Traces = Traces.drop_back(NumTestTraces);
+
+  std::vector<BPFunctionNode> Nodes;
+  TemporalProfTraceTy::createBPFunctionNodes(Traces, Nodes);
   BalancedPartitioningConfig Config;
   BalancedPartitioning BP(Config);
   BP.run(Nodes);
 
   OS << "# Ordered " << Nodes.size() << " functions\n";
+  if (!TestTraces.empty()) {
+    // Since we don't know the symbol sizes, we assume 32 functions per page.
+    DenseMap<BPFunctionNode::IDT, unsigned> IdToPageNumber;
+    for (auto &Node : Nodes)
+      IdToPageNumber[Node.Id] = IdToPageNumber.size() / 32;
+
+    SmallSet<unsigned, 0> TouchedPages;
+    unsigned Area = 0;
+    for (auto &Trace : TestTraces) {
+      for (auto Id : Trace.FunctionNameRefs) {
+        auto It = IdToPageNumber.find(Id);
+        if (It == IdToPageNumber.end())
+          continue;
+        TouchedPages.insert(It->getSecond());
+        Area += TouchedPages.size();
+      }
+      TouchedPages.clear();
+    }
+    OS << "# Total area under the page fault curve: " << (float)Area << "\n";
+  }
   OS << "# Warning: Mach-O may prefix symbols with \"_\" depending on the "
         "linkage and this output does not take that into account. Some "
         "post-processing may be required before passing to the linker via "
author	Ellis Hoag <ellis.sparky.hoag@gmail.com>	2024-05-23 11:19:29 -0700
committer	GitHub <noreply@github.com>	2024-05-23 11:19:29 -0700
commit	73eb9b33147ba5157cbf5d8276ee718629dfbbda (patch)
tree	df6b29afdcf4317955790da4a4b83ea0be9acff8 /llvm/tools
parent	dd2d132fb3521f37f44656edd65cca75430c251e (diff)
download	llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.zip llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.tar.gz llvm-73eb9b33147ba5157cbf5d8276ee718629dfbbda.tar.bz2