diff options
author | Ellis Hoag <ellis.sparky.hoag@gmail.com> | 2025-08-08 10:21:29 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-08 10:21:29 -0700 |
commit | 3b884db6ef458148ddc71e8133b334f8e1794ff9 (patch) | |
tree | 42d521b0a074dd659aba670dee445868ffd31026 /llvm/lib/ProfileData/InstrProfWriter.cpp | |
parent | 688551f05cf5f6c90e0f5edc890ee13bb563fa95 (diff) | |
download | llvm-3b884db6ef458148ddc71e8133b334f8e1794ff9.zip llvm-3b884db6ef458148ddc71e8133b334f8e1794ff9.tar.gz llvm-3b884db6ef458148ddc71e8133b334f8e1794ff9.tar.bz2 |
[InstrProf] Fix trace reservoir sampling (#152563)
`InstrProfWriter::addTemporalProfileTraces()` did not correctly account
for when the sources traces are sampled, but the reservoir size is
larger than what it was before, meaning there is room for more traces.
Also, if the reservoir size decreased, meaning traces should be
truncated.
Depends on https://github.com/llvm/llvm-project/pull/152550 for the test
refactor
Diffstat (limited to 'llvm/lib/ProfileData/InstrProfWriter.cpp')
-rw-r--r-- | llvm/lib/ProfileData/InstrProfWriter.cpp | 63 |
1 files changed, 18 insertions, 45 deletions
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 7ca26aa..df807fc 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -331,61 +331,34 @@ void InstrProfWriter::addDataAccessProfData( DataAccessProfileData = std::move(DataAccessProfDataIn); } -void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { - assert(Trace.FunctionNameRefs.size() <= MaxTemporalProfTraceLength); - assert(!Trace.FunctionNameRefs.empty()); - if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { - // Simply append the trace if we have not yet hit our reservoir size limit. - TemporalProfTraces.push_back(std::move(Trace)); - } else { - // Otherwise, replace a random trace in the stream. - std::uniform_int_distribution<uint64_t> Distribution( - 0, TemporalProfTraceStreamSize); - uint64_t RandomIndex = Distribution(RNG); - if (RandomIndex < TemporalProfTraces.size()) - TemporalProfTraces[RandomIndex] = std::move(Trace); - } - ++TemporalProfTraceStreamSize; -} - void InstrProfWriter::addTemporalProfileTraces( SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { + if (TemporalProfTraces.size() > TemporalProfTraceReservoirSize) + TemporalProfTraces.truncate(TemporalProfTraceReservoirSize); for (auto &Trace : SrcTraces) if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength); llvm::erase_if(SrcTraces, [](auto &T) { return T.FunctionNameRefs.empty(); }); - // Assume that the source has the same reservoir size as the destination to - // avoid needing to record it in the indexed profile format. - bool IsDestSampled = - (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); - bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); - if (!IsDestSampled && IsSrcSampled) { - // If one of the traces are sampled, ensure that it belongs to Dest. - std::swap(TemporalProfTraces, SrcTraces); - std::swap(TemporalProfTraceStreamSize, SrcStreamSize); - std::swap(IsDestSampled, IsSrcSampled); - } - if (!IsSrcSampled) { - // If the source stream is not sampled, we add each source trace normally. - for (auto &Trace : SrcTraces) - addTemporalProfileTrace(std::move(Trace)); + // If there are no source traces, it is probably because + // --temporal-profile-max-trace-length=0 was set to deliberately remove all + // traces. In that case, we do not want to increase the stream size + if (SrcTraces.empty()) return; - } - // Otherwise, we find the traces that would have been removed if we added - // the whole source stream. - SmallSetVector<uint64_t, 8> IndicesToReplace; - for (uint64_t I = 0; I < SrcStreamSize; I++) { - std::uniform_int_distribution<uint64_t> Distribution( - 0, TemporalProfTraceStreamSize); + // Add traces until our reservoir is full or we run out of source traces + auto SrcTraceIt = SrcTraces.begin(); + while (TemporalProfTraces.size() < TemporalProfTraceReservoirSize && + SrcTraceIt < SrcTraces.end()) + TemporalProfTraces.push_back(*SrcTraceIt++); + // Our reservoir is full, we need to sample the source stream + llvm::shuffle(SrcTraceIt, SrcTraces.end(), RNG); + for (uint64_t I = TemporalProfTraces.size(); + I < SrcStreamSize && SrcTraceIt < SrcTraces.end(); I++) { + std::uniform_int_distribution<uint64_t> Distribution(0, I); uint64_t RandomIndex = Distribution(RNG); if (RandomIndex < TemporalProfTraces.size()) - IndicesToReplace.insert(RandomIndex); - ++TemporalProfTraceStreamSize; + TemporalProfTraces[RandomIndex] = *SrcTraceIt++; } - // Then we insert a random sample of the source traces. - llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG); - for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces)) - TemporalProfTraces[Index] = std::move(Trace); + TemporalProfTraceStreamSize += SrcStreamSize; } void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, |