aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHongtao Yu <hoy@fb.com>2022-05-11 21:33:09 -0700
committerwlei <wlei@fb.com>2022-05-12 10:58:50 -0700
commit9f732af583c0dba58847b753a87cc5432ec33f09 (patch)
tree8004d995f4391a99bec4731b4cbfab93ee897f6e
parent9145cb8b7c9acd6e7980047e048007bc58b7fab9 (diff)
downloadllvm-9f732af583c0dba58847b753a87cc5432ec33f09.zip
llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.gz
llvm-9f732af583c0dba58847b753a87cc5432ec33f09.tar.bz2
[llvm-profgen] Filter out oversized LBR ranges.
As a follow up to {D123271}, LBR ranges that are too big should also be considered as invalid. For example, the last two pairs in the following trace form a range [0x0d7b02b0, 0x368ba706] that covers a ton of functions in the binary. Such oversized range should also be ignored. 0x0c74505f/0x368b99a0 **0x368ba706**/0x0c745040 0x0d7b1c3f/**0x0d7b02b0** Add a defensive check to filter out those ranges based that the valid range should not cross the unconditional branch(Call, return, unconditional jmp). Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D125448
-rw-r--r--llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript9
-rw-r--r--llvm/test/tools/llvm-profgen/invalid-range.test51
-rw-r--r--llvm/tools/llvm-profgen/PerfReader.cpp13
-rw-r--r--llvm/tools/llvm-profgen/PerfReader.h11
-rw-r--r--llvm/tools/llvm-profgen/ProfiledBinary.cpp11
-rw-r--r--llvm/tools/llvm-profgen/ProfiledBinary.h9
6 files changed, 70 insertions, 34 deletions
diff --git a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
index d6f8553..4982130 100644
--- a/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
+++ b/llvm/test/tools/llvm-profgen/Inputs/invalid-range.perfscript
@@ -8,3 +8,12 @@
// The consecutive pairs 0x2017bf/0x201760 and 0x2017d8/0x2017e3 form an invalid execution range [0x2017e3, 0x2017bf], should be ignored to avoid bogus instruction ranges.
+
+
+ 20179e
+ 2017f9
+ 7f83e84e7793
+ 5541f689495641d7
+ 0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0 0x2017cf/0x20179e/P/-/-/0 0x20177f/0x2017c4/P/-/-/0 0x2017bf/0x201760/P/-/-/0
+
+// Duplicated LBR entries "0x2017cf/0x20179e/P/-/-/0 0x2017cf/0x20179e/P/-/-/0", the range [0x20179e, 0x2017cf] is cross unconditional jmp, should be ignored to avoid bogus instruction ranges.
diff --git a/llvm/test/tools/llvm-profgen/invalid-range.test b/llvm/test/tools/llvm-profgen/invalid-range.test
index 980fd94..6d6f2cb 100644
--- a/llvm/test/tools/llvm-profgen/invalid-range.test
+++ b/llvm/test/tools/llvm-profgen/invalid-range.test
@@ -8,39 +8,40 @@
; RUN: FileCheck %s --input-file %t2 --check-prefix=CS
-; NOCS: 4
-; NOCS-NEXT: 201760-20177f:2
-; NOCS-NEXT: 20179e-2017bf:1
-; NOCS-NEXT: 2017c4-2017cf:1
+; NOCS: 4
+; NOCS-NEXT: 201760-20177f:7
+; NOCS-NEXT: 20179e-2017bf:5
+; NOCS-NEXT: 2017c4-2017cf:6
; NOCS-NEXT: 2017c4-2017d8:1
; NOCS-NEXT: 4
-; NOCS-NEXT: 20177f->2017c4:2
-; NOCS-NEXT: 2017bf->201760:2
-; NOCS-NEXT: 2017cf->20179e:2
+; NOCS-NEXT: 20177f->2017c4:7
+; NOCS-NEXT: 2017bf->201760:7
+; NOCS-NEXT: 2017cf->20179e:8
; NOCS-NEXT: 2017d8->2017e3:1
; CS: []
-; CS-NEXT: 3
-; CS-NEXT: 201760-20177f:1
-; CS-NEXT: 20179e-2017bf:1
-; CS-NEXT: 2017c4-2017d8:1
-; CS-NEXT: 4
-; CS-NEXT: 20177f->2017c4:1
-; CS-NEXT: 2017bf->201760:1
-; CS-NEXT: 2017cf->20179e:1
-; CS-NEXT: 2017d8->2017e3:1
+; CS-NEXT: 4
+; CS-NEXT: 201760-20177f:6
+; CS-NEXT: 20179e-2017bf:5
+; CS-NEXT: 2017c4-2017cf:5
+; CS-NEXT: 2017c4-2017d8:1
+; CS-NEXT: 4
+; CS-NEXT: 20177f->2017c4:6
+; CS-NEXT: 2017bf->201760:6
+; CS-NEXT: 2017cf->20179e:6
+; CS-NEXT: 2017d8->2017e3:1
; CS-NEXT: [0x7f4]
-; CS-NEXT: 1
-; CS-NEXT: 2017c4-2017cf:1
-; CS-NEXT: 2
-; CS-NEXT: 2017bf->201760:1
-; CS-NEXT: 2017cf->20179e:1
+; CS-NEXT: 1
+; CS-NEXT: 2017c4-2017cf:1
+; CS-NEXT: 2
+; CS-NEXT: 2017bf->201760:1
+; CS-NEXT: 2017cf->20179e:2
; CS-NEXT: [0x7f4 @ 0x7bf]
-; CS-NEXT: 1
-; CS-NEXT: 201760-20177f:1
-; CS-NEXT: 1
-; CS-NEXT: 20177f->2017c4:1
+; CS-NEXT: 1
+; CS-NEXT: 201760-20177f:1
+; CS-NEXT: 1
+; CS-NEXT: 20177f->2017c4:1
; clang -O3 -fuse-ld=lld -fpseudo-probe-for-profiling
; -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Xclang -mdisable-tail-calls
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 6888e93..2c4912c 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -99,7 +99,8 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
return;
}
- if (Target > End) {
+ if (!isValidFallThroughRange(Binary->virtualAddrToOffset(Target),
+ Binary->virtualAddrToOffset(End), Binary)) {
// Skip unwinding the rest of LBR trace when a bogus range is seen.
State.setInvalid();
return;
@@ -581,7 +582,6 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
if (!SrcIsInternal && !DstIsInternal)
continue;
- // TODO: filter out buggy duplicate branches on Skylake
LBRStack.emplace_back(LBREntry(Src, Dst));
}
TraceIt.advance();
@@ -878,7 +878,7 @@ void PerfScriptReader::computeCounterFromLBR(const PerfSample *Sample,
// LBR and FROM of next LBR.
uint64_t StartOffset = TargetOffset;
if (Binary->offsetIsCode(StartOffset) && Binary->offsetIsCode(EndOffeset) &&
- StartOffset <= EndOffeset)
+ isValidFallThroughRange(StartOffset, EndOffeset, Binary))
Counter.recordRangeCount(StartOffset, EndOffeset, Repeat);
EndOffeset = SourceOffset;
}
@@ -1124,7 +1124,7 @@ void PerfScriptReader::warnInvalidRange() {
const char *RangeCrossFuncMsg =
"Fall through range should not cross function boundaries, likely due to "
"profile and binary mismatch.";
- const char *BogusRangeMsg = "Range start is after range end.";
+ const char *BogusRangeMsg = "Range start is after or too far from range end.";
uint64_t TotalRangeNum = 0;
uint64_t InstNotBoundary = 0;
@@ -1155,7 +1155,7 @@ void PerfScriptReader::warnInvalidRange() {
WarnInvalidRange(StartOffset, EndOffset, RangeCrossFuncMsg);
}
- if (StartOffset > EndOffset) {
+ if (!isValidFallThroughRange(StartOffset, EndOffset, Binary)) {
BogusRange += I.second;
WarnInvalidRange(StartOffset, EndOffset, BogusRangeMsg);
}
@@ -1172,7 +1172,8 @@ void PerfScriptReader::warnInvalidRange() {
"of samples are from ranges that do cross function boundaries.");
emitWarningSummary(
BogusRange, TotalRangeNum,
- "of samples are from ranges that have range start after range end.");
+ "of samples are from ranges that have range start after or too far from "
+ "range end acrossing the unconditinal jmp.");
}
void PerfScriptReader::parsePerfTraces() {
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 56ac03f..6b5f390 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -210,6 +210,17 @@ using AggregatedCounter =
using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
+inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
+ ProfiledBinary *Binary) {
+ // Start bigger than End is considered invalid.
+ // LBR ranges cross the unconditional jmp are also assumed invalid.
+ // It's found that perf data may contain duplicate LBR entries that could form
+ // a range that does not reflect real execution flow on some Intel targets,
+ // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
+ // cannot be a linear execution range that spans over unconditional jmp.
+ return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
+}
+
// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2b742f8..8f9c63f 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -495,12 +495,17 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
// Populate address maps.
CodeAddrOffsets.push_back(Offset);
- if (MCDesc.isCall())
+ if (MCDesc.isCall()) {
CallOffsets.insert(Offset);
- else if (MCDesc.isReturn())
+ UncondBranchOffsets.insert(Offset);
+ } else if (MCDesc.isReturn()) {
RetOffsets.insert(Offset);
- else if (MCDesc.isBranch())
+ UncondBranchOffsets.insert(Offset);
+ } else if (MCDesc.isBranch()) {
+ if (MCDesc.isUnconditionalBranch())
+ UncondBranchOffsets.insert(Offset);
BranchOffsets.insert(Offset);
+ }
if (InvalidInstLength) {
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index 5512a22..ee585fc 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -239,6 +239,8 @@ class ProfiledBinary {
std::unordered_set<uint64_t> CallOffsets;
// A set of return instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> RetOffsets;
+ // An ordered set of unconditional branch instruction offsets.
+ std::set<uint64_t> UncondBranchOffsets;
// A set of branch instruction offsets.
std::unordered_set<uint64_t> BranchOffsets;
@@ -395,6 +397,13 @@ public:
CallOffsets.count(Offset);
}
+ bool rangeCrossUncondBranch(uint64_t Start, uint64_t End) {
+ if (Start >= End)
+ return false;
+ auto R = UncondBranchOffsets.lower_bound(Start);
+ return R != UncondBranchOffsets.end() && *R < End;
+ }
+
uint64_t getAddressforIndex(uint64_t Index) const {
return offsetToVirtualAddr(CodeAddrOffsets[Index]);
}