[llvm-profgen] Filter out oversized LBR ranges.

As a follow up to {D123271}, LBR ranges that are too big should also be considered as invalid. For example, the last two pairs in the following trace form a range [0x0d7b02b0, 0x368ba706] that covers a ton of functions in the binary. Such oversized range should also be ignored. 0x0c74505f/0x368b99a0 **0x368ba706**/0x0c745040 0x0d7b1c3f/**0x0d7b02b0** Add a defensive check to filter out those ranges based that the valid range should not cross the unconditional branch(Call, return, unconditional jmp). Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D125448
author: Hongtao Yu <hoy@fb.com> 2022-05-11 21:33:09 -0700
committer: wlei <wlei@fb.com> 2022-05-12 10:58:50 -0700
commit: 9f732af583c0dba58847b753a87cc5432ec33f09 (patch)
tree: 8004d995f4391a99bec4731b4cbfab93ee897f6e
parent: 9145cb8b7c9acd6e7980047e048007bc58b7fab9 (diff)
download: llvm-9f732af583c0dba58847b753a87cc5432ec33f09.zip
llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.gz
llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.bz2
6 files changed, 70 insertions, 34 deletions
diff --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
index d6f8553..4982130 100644
--- a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
+++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
@@ -8,3 +8,12 @@
 
 
 // The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.
+
+
+	          20179e
+	          2017f9
+	    7f83e84e7793
+	5541f689495641d7
+ 0x2017cf/0x20179e/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0  0x2017cf/0x20179e/P/-/-/0  0x20177f/0x2017c4/P/-/-/0  0x2017bf/0x201760/P/-/-/0
+
+// Duplicated LBR entries "0x2017cf/0x20179e/P/-/-/0  0x2017cf/0x20179e/P/-/-/0", the range [0x20179e, 0x2017cf] is cross unconditional jmp, should be ignored to avoid bogus instruction ranges.
diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test
index 980fd94..6d6f2cb 100644
--- a/llvm/test/tools/llvm-profgen/invalid-range.test
+++ b/llvm/test/tools/llvm-profgen/invalid-range.test
@@ -8,39 +8,40 @@
 ; RUN: FileCheck %s --input-file %t2 --check-prefix=CS
 
 
-; NOCS:      4
-; NOCS-NEXT: 201760-20177f:2
-; NOCS-NEXT: 20179e-2017bf:1
-; NOCS-NEXT: 2017c4-2017cf:1
+; NOCS: 4
+; NOCS-NEXT: 201760-20177f:7
+; NOCS-NEXT: 20179e-2017bf:5
+; NOCS-NEXT: 2017c4-2017cf:6
 ; NOCS-NEXT: 2017c4-2017d8:1
 ; NOCS-NEXT: 4
-; NOCS-NEXT: 20177f->2017c4:2
-; NOCS-NEXT: 2017bf->201760:2
-; NOCS-NEXT: 2017cf->20179e:2
+; NOCS-NEXT: 20177f->2017c4:7
+; NOCS-NEXT: 2017bf->201760:7
+; NOCS-NEXT: 2017cf->20179e:8
 ; NOCS-NEXT: 2017d8->2017e3:1
 
 
 ; CS:      []
-; CS-NEXT:  3
-; CS-NEXT:  201760-20177f:1
-; CS-NEXT:  20179e-2017bf:1
-; CS-NEXT:  2017c4-2017d8:1
-; CS-NEXT:  4
-; CS-NEXT:  20177f->2017c4:1
-; CS-NEXT:  2017bf->201760:1
-; CS-NEXT:  2017cf->20179e:1
-; CS-NEXT:  2017d8->2017e3:1
+; CS-NEXT:   4
+; CS-NEXT:   201760-20177f:6
+; CS-NEXT:   20179e-2017bf:5
+; CS-NEXT:   2017c4-2017cf:5
+; CS-NEXT:   2017c4-2017d8:1
+; CS-NEXT:   4
+; CS-NEXT:   20177f->2017c4:6
+; CS-NEXT:   2017bf->201760:6
+; CS-NEXT:   2017cf->20179e:6
+; CS-NEXT:   2017d8->2017e3:1
 ; CS-NEXT: [0x7f4]
-; CS-NEXT:  1
-; CS-NEXT:  2017c4-2017cf:1
-; CS-NEXT:  2
-; CS-NEXT:  2017bf->201760:1
-; CS-NEXT:  2017cf->20179e:1
+; CS-NEXT:   1
+; CS-NEXT:   2017c4-2017cf:1
+; CS-NEXT:   2
+; CS-NEXT:   2017bf->201760:1
+; CS-NEXT:   2017cf->20179e:2
 ; CS-NEXT: [0x7f4 @ 0x7bf]
-; CS-NEXT:  1
-; CS-NEXT:  201760-20177f:1
-; CS-NEXT:  1
-; CS-NEXT:  20177f->2017c4:1
+; CS-NEXT:   1
+; CS-NEXT:   201760-20177f:1
+; CS-NEXT:   1
+; CS-NEXT:   20177f->2017c4:1
 
 ; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling
 ; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 6888e93..2c4912c 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -99,7 +99,8 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
     return;
   }
 
-  if (Target > End) {
+  if (!isValidFallThroughRange(Binary->virtualAddrToOffset(Target),
+                               Binary->virtualAddrToOffset(End), Binary)) {
     // Skip unwinding the rest of LBR trace when a bogus range is seen.
     State.setInvalid();
     return;
@@ -581,7 +582,6 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
     if (!SrcIsInternal && !DstIsInternal)
       continue;
 
-    // TODO: filter out buggy duplicate branches on Skylake
     LBRStack.emplace_back(LBREntry(Src, Dst));
   }
   TraceIt.advance();
@@ -878,7 +878,7 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
     // LBR and FROM of next LBR.
     uint64_t StartOffset = TargetOffset;
     if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) &&
-        StartOffset <= EndOffeset)
+        isValidFallThroughRange(StartOffset, EndOffeset, Binary))
       Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
     EndOffeset = SourceOffset;
   }
@@ -1124,7 +1124,7 @@ void PerfScriptReader::warnInvalidRange() {
   const char *RangeCrossFuncMsg =
       "Fall through range should not cross function boundaries, likely due to "
       "profile and binary mismatch.";
-  const char *BogusRangeMsg = "Range start is after range end.";
+  const char *BogusRangeMsg = "Range start is after or too far from range end.";
 
   uint64_t TotalRangeNum = 0;
   uint64_t InstNotBoundary = 0;
@@ -1155,7 +1155,7 @@ void PerfScriptReader::warnInvalidRange() {
       WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
     }
 
-    if (StartOffset > EndOffset) {
+    if (!isValidFallThroughRange(StartOffset, EndOffset, Binary)) {
       BogusRange += I.second;
       WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
     }
@@ -1172,7 +1172,8 @@ void PerfScriptReader::warnInvalidRange() {
       "of samples are from ranges that do cross function boundaries.");
   emitWarningSummary(
       BogusRange, TotalRangeNum,
-      "of samples are from ranges that have range start after range end.");
+      "of samples are from ranges that have range start after or too far from "
+      "range end acrossing the unconditinal jmp.");
 }
 
 void PerfScriptReader::parsePerfTraces() {
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 56ac03f..6b5f390 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -210,6 +210,17 @@ using AggregatedCounter =
 
 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
 
+inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
+                                    ProfiledBinary *Binary) {
+  // Start bigger than End is considered invalid.
+  // LBR ranges cross the unconditional jmp are also assumed invalid.
+  // It's found that perf data may contain duplicate LBR entries that could form
+  // a range that does not reflect real execution flow on some Intel targets,
+  // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
+  // cannot be a linear execution range that spans over unconditional jmp.
+  return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
+}
+
 // The state for the unwinder, it doesn't hold the data but only keep the
 // pointer/index of the data, While unwinding, the CallStack is changed
 // dynamicially and will be recorded as the context of the sample
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2b742f8..8f9c63f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -495,12 +495,17 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
 
       // Populate address maps.
       CodeAddrOffsets.push_back(Offset);
-      if (MCDesc.isCall())
+      if (MCDesc.isCall()) {
         CallOffsets.insert(Offset);
-      else if (MCDesc.isReturn())
+        UncondBranchOffsets.insert(Offset);
+      } else if (MCDesc.isReturn()) {
         RetOffsets.insert(Offset);
-      else if (MCDesc.isBranch())
+        UncondBranchOffsets.insert(Offset);
+      } else if (MCDesc.isBranch()) {
+        if (MCDesc.isUnconditionalBranch())
+          UncondBranchOffsets.insert(Offset);
         BranchOffsets.insert(Offset);
+      }
 
       if (InvalidInstLength) {
         WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5512a22..ee585fc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -239,6 +239,8 @@ class ProfiledBinary {
   std::unordered_set<uint64_t> CallOffsets;
   // A set of return instruction offsets. Used by virtual unwinding.
   std::unordered_set<uint64_t> RetOffsets;
+  // An ordered set of unconditional branch instruction offsets.
+  std::set<uint64_t> UncondBranchOffsets;
   // A set of branch instruction offsets.
   std::unordered_set<uint64_t> BranchOffsets;
 
@@ -395,6 +397,13 @@ public:
            CallOffsets.count(Offset);
   }
 
+  bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) {
+    if (Start >= End)
+      return false;
+    auto R = UncondBranchOffsets.lower_bound(Start);
+    return R != UncondBranchOffsets.end() && *R < End;
+  }
+
   uint64_t getAddressforIndex(uint64_t Index) const {
     return offsetToVirtualAddr(CodeAddrOffsets[Index]);
   }
author	Hongtao Yu <hoy@fb.com>	2022-05-11 21:33:09 -0700
committer	wlei <wlei@fb.com>	2022-05-12 10:58:50 -0700
commit	9f732af583c0dba58847b753a87cc5432ec33f09 (patch)
tree	8004d995f4391a99bec4731b4cbfab93ee897f6e
parent	9145cb8b7c9acd6e7980047e048007bc58b7fab9 (diff)
download	llvm-9f732af583c0dba58847b753a87cc5432ec33f09.zip llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.gz llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.bz2