aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/ProfileData/SampleProfReader.cpp
diff options
context:
space:
mode:
authorHongtao Yu <hoy@fb.com>2020-12-16 12:54:50 -0800
committerHongtao Yu <hoy@fb.com>2020-12-16 15:57:18 -0800
commitac068e014b22548ae1c0dc5ff3dd8821d41f45c0 (patch)
tree22eca06cfe4ca403f7dce19a9c11614bc8696fb1 /llvm/lib/ProfileData/SampleProfReader.cpp
parent687e80be7fcf75900c354152d8b21500c3b08850 (diff)
downloadllvm-ac068e014b22548ae1c0dc5ff3dd8821d41f45c0.zip
llvm-ac068e014b22548ae1c0dc5ff3dd8821d41f45c0.tar.gz
llvm-ac068e014b22548ae1c0dc5ff3dd8821d41f45c0.tar.bz2
[CSSPGO] Consume pseudo-probe-based AutoFDO profile
This change enables pseudo-probe-based sample counts to be consumed by the sample profile loader under the regular `-fprofile-sample-use` switch with minimal adjustments to the existing sample file formats. After the counts are imported, a probe helper, aka, a `PseudoProbeManager` object, is automatically launched to verify the CFG checksum of every function in the current compilation against the corresponding checksum from the profile. Mismatched checksums will cause a function profile to be slipped. A `SampleProfileProber` pass is scheduled before any of the `SampleProfileLoader` instances so that the CFG checksums as well as probe mappings are available during the profile loading time. The `PseudoProbeManager` object is set up right after the profile reading is done. In the future a CFG-based fuzzy matching could be done in `PseudoProbeManager`. Samples will be applied only to pseudo probe instructions as well as probed callsites once the checksum verification goes through. Those instructions are processed in the same way that regular instructions would be processed in the line-number-based scenario. In other words, a function is processed in a regular way as if it was reduced to just containing pseudo probes (block probes and callsites). **Adjustment to profile format ** A CFG checksum field is being added to the existing AutoFDO profile formats. So far only the text format and the extended binary format are supported. For the text format, a new line like ``` !CFGChecksum: 12345 ``` is added to the end of the body sample lines. For the extended binary profile format, we introduce a metadata section to store the checksum map from function names to their CFG checksums. Differential Revision: https://reviews.llvm.org/D92347
Diffstat (limited to 'llvm/lib/ProfileData/SampleProfReader.cpp')
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp105
1 files changed, 92 insertions, 13 deletions
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6a574ff..18f9ec7 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -83,26 +83,52 @@ static bool ParseHead(const StringRef &Input, StringRef &FName,
/// Returns true if line offset \p L is legal (only has 16 bits).
static bool isOffsetLegal(unsigned L) { return (L & 0xffff) == L; }
+/// Parse \p Input that contains metadata.
+/// Possible metadata:
+/// - CFG Checksum information:
+/// !CFGChecksum: 12345
+/// Stores the FunctionHash (a.k.a. CFG Checksum) into \p FunctionHash.
+static bool parseMetadata(const StringRef &Input, uint64_t &FunctionHash) {
+ if (!Input.startswith("!CFGChecksum:"))
+ return false;
+
+ StringRef CFGInfo = Input.substr(strlen("!CFGChecksum:")).trim();
+ return !CFGInfo.getAsInteger(10, FunctionHash);
+}
+
+enum class LineType {
+ CallSiteProfile,
+ BodyProfile,
+ Metadata,
+};
+
/// Parse \p Input as line sample.
///
/// \param Input input line.
-/// \param IsCallsite true if the line represents an inlined callsite.
+/// \param LineTy Type of this line.
/// \param Depth the depth of the inline stack.
/// \param NumSamples total samples of the line/inlined callsite.
/// \param LineOffset line offset to the start of the function.
/// \param Discriminator discriminator of the line.
/// \param TargetCountMap map from indirect call target to count.
+/// \param FunctionHash the function's CFG hash, used by pseudo probe.
///
/// returns true if parsing is successful.
-static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
+static bool ParseLine(const StringRef &Input, LineType &LineTy, uint32_t &Depth,
uint64_t &NumSamples, uint32_t &LineOffset,
uint32_t &Discriminator, StringRef &CalleeName,
- DenseMap<StringRef, uint64_t> &TargetCountMap) {
+ DenseMap<StringRef, uint64_t> &TargetCountMap,
+ uint64_t &FunctionHash) {
for (Depth = 0; Input[Depth] == ' '; Depth++)
;
if (Depth == 0)
return false;
+ if (Depth == 1 && Input[Depth] == '!') {
+ LineTy = LineType::Metadata;
+ return parseMetadata(Input.substr(Depth), FunctionHash);
+ }
+
size_t n1 = Input.find(':');
StringRef Loc = Input.substr(Depth, n1 - Depth);
size_t n2 = Loc.find('.');
@@ -119,7 +145,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
StringRef Rest = Input.substr(n1 + 2);
if (Rest[0] >= '0' && Rest[0] <= '9') {
- IsCallsite = false;
+ LineTy = LineType::BodyProfile;
size_t n3 = Rest.find(' ');
if (n3 == StringRef::npos) {
if (Rest.getAsInteger(10, NumSamples))
@@ -176,7 +202,7 @@ static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth,
n3 = n4;
}
} else {
- IsCallsite = true;
+ LineTy = LineType::CallSiteProfile;
size_t n3 = Rest.find_last_of(':');
CalleeName = Rest.substr(0, n3);
if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples))
@@ -198,6 +224,11 @@ std::error_code SampleProfileReaderText::readImpl() {
InlineCallStack InlineStack;
int CSProfileCount = 0;
int RegularProfileCount = 0;
+ uint32_t ProbeProfileCount = 0;
+
+ // SeenMetadata tracks whether we have processed metadata for the current
+ // top-level function profile.
+ bool SeenMetadata = false;
for (; !LineIt.is_at_eof(); ++LineIt) {
if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
@@ -222,6 +253,7 @@ std::error_code SampleProfileReaderText::readImpl() {
"Expected 'mangled_name:NUM:NUM', found " + *LineIt);
return sampleprof_error::malformed;
}
+ SeenMetadata = false;
SampleContext FContext(FName);
if (FContext.hasContext())
++CSProfileCount;
@@ -239,25 +271,35 @@ std::error_code SampleProfileReaderText::readImpl() {
uint64_t NumSamples;
StringRef FName;
DenseMap<StringRef, uint64_t> TargetCountMap;
- bool IsCallsite;
uint32_t Depth, LineOffset, Discriminator;
- if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset,
- Discriminator, FName, TargetCountMap)) {
+ LineType LineTy;
+ uint64_t FunctionHash;
+ if (!ParseLine(*LineIt, LineTy, Depth, NumSamples, LineOffset,
+ Discriminator, FName, TargetCountMap, FunctionHash)) {
reportError(LineIt.line_number(),
"Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " +
*LineIt);
return sampleprof_error::malformed;
}
- if (IsCallsite) {
- while (InlineStack.size() > Depth) {
- InlineStack.pop_back();
- }
+ if (SeenMetadata && LineTy != LineType::Metadata) {
+ // Metadata must be put at the end of a function profile.
+ reportError(LineIt.line_number(),
+ "Found non-metadata after metadata: " + *LineIt);
+ return sampleprof_error::malformed;
+ }
+ while (InlineStack.size() > Depth) {
+ InlineStack.pop_back();
+ }
+ switch (LineTy) {
+ case LineType::CallSiteProfile: {
FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt(
LineLocation(LineOffset, Discriminator))[std::string(FName)];
FSamples.setName(FName);
MergeResult(Result, FSamples.addTotalSamples(NumSamples));
InlineStack.push_back(&FSamples);
- } else {
+ break;
+ }
+ case LineType::BodyProfile: {
while (InlineStack.size() > Depth) {
InlineStack.pop_back();
}
@@ -269,6 +311,15 @@ std::error_code SampleProfileReaderText::readImpl() {
}
MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator,
NumSamples));
+ break;
+ }
+ case LineType::Metadata: {
+ FunctionSamples &FProfile = *InlineStack.back();
+ FProfile.setFunctionHash(FunctionHash);
+ ++ProbeProfileCount;
+ SeenMetadata = true;
+ break;
+ }
}
}
}
@@ -276,6 +327,10 @@ std::error_code SampleProfileReaderText::readImpl() {
assert((RegularProfileCount == 0 || CSProfileCount == 0) &&
"Cannot have both context-sensitive and regular profile");
ProfileIsCS = (CSProfileCount > 0);
+ assert((ProbeProfileCount == 0 || ProbeProfileCount == Profiles.size()) &&
+ "Cannot have both probe-based profiles and regular profiles");
+ ProfileIsProbeBased = (ProbeProfileCount > 0);
+ FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
if (Result == sampleprof_error::success)
computeSummary();
@@ -540,6 +595,13 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
if (std::error_code EC = readFuncOffsetTable())
return EC;
break;
+ case SecFuncMetadata:
+ ProfileIsProbeBased =
+ hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
+ FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
+ if (std::error_code EC = readFuncMetadata())
+ return EC;
+ break;
case SecProfileSymbolList:
if (std::error_code EC = readProfileSymbolList())
return EC;
@@ -804,6 +866,23 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
return SampleProfileReaderBinary::readNameTable();
}
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadata() {
+ if (!ProfileIsProbeBased)
+ return sampleprof_error::success;
+ for (unsigned I = 0; I < Profiles.size(); ++I) {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+
+ auto Checksum = readNumber<uint64_t>();
+ if (std::error_code EC = Checksum.getError())
+ return EC;
+
+ Profiles[*FName].setFunctionHash(*Checksum);
+ }
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileReaderCompactBinary::readNameTable() {
auto Size = readNumber<uint64_t>();
if (std::error_code EC = Size.getError())