#include "llvm/ProfileData/DataAccessProf.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/StringSaver.h" #include "llvm/Support/raw_ostream.h" namespace llvm { namespace memprof { // If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise, // creates an owned copy of `Str`, adds a map entry for it and returns the // iterator. static std::pair saveStringToMap(DataAccessProfData::StringToIndexMap &Map, llvm::UniqueStringSaver &Saver, StringRef Str) { auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size()); return *Iter; } // Returns the canonical name or error. static Expected getCanonicalName(StringRef Name) { if (Name.empty()) return make_error("Empty symbol name", llvm::errc::invalid_argument); return InstrProfSymtab::getCanonicalName(Name); } std::optional DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const { auto Key = SymbolID; if (std::holds_alternative(SymbolID)) { auto NameOrErr = getCanonicalName(std::get(SymbolID)); // If name canonicalization fails, suppress the error inside. if (!NameOrErr) { assert( std::get(SymbolID).empty() && "Name canonicalization only fails when stringified string is empty."); return std::nullopt; } Key = *NameOrErr; } auto It = Records.find(Key); if (It != Records.end()) { return DataAccessProfRecord(Key, It->second.AccessCount, It->second.Locations); } return std::nullopt; } bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const { if (std::holds_alternative(SymID)) return KnownColdHashes.contains(std::get(SymID)); return KnownColdSymbols.contains(std::get(SymID)); } Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol, uint64_t AccessCount) { uint64_t RecordID = -1; const bool IsStringLiteral = std::holds_alternative(Symbol); SymbolHandleRef Key; if (IsStringLiteral) { RecordID = std::get(Symbol); Key = RecordID; } else { auto CanonicalName = getCanonicalName(std::get(Symbol)); if (!CanonicalName) return CanonicalName.takeError(); std::tie(Key, RecordID) = saveStringToMap(StrToIndexMap, Saver, *CanonicalName); } auto [Iter, Inserted] = Records.try_emplace(Key, RecordID, AccessCount, IsStringLiteral); if (!Inserted) return make_error("Duplicate symbol or string literal added. " "User of DataAccessProfData should " "aggregate count for the same symbol. ", llvm::errc::invalid_argument); return Error::success(); } Error DataAccessProfData::setDataAccessProfile( SymbolHandleRef SymbolID, uint64_t AccessCount, ArrayRef Locations) { if (Error E = setDataAccessProfile(SymbolID, AccessCount)) return E; auto &Record = Records.back().second; for (const auto &Location : Locations) Record.Locations.push_back( {saveStringToMap(StrToIndexMap, Saver, Location.FileName).first, Location.Line}); return Error::success(); } Error DataAccessProfData::addKnownSymbolWithoutSamples( SymbolHandleRef SymbolID) { if (std::holds_alternative(SymbolID)) { KnownColdHashes.insert(std::get(SymbolID)); return Error::success(); } auto CanonicalName = getCanonicalName(std::get(SymbolID)); if (!CanonicalName) return CanonicalName.takeError(); KnownColdSymbols.insert( saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first); return Error::success(); } Error DataAccessProfData::deserialize(const unsigned char *&Ptr) { uint64_t NumSampledSymbols = support::endian::readNext(Ptr); uint64_t NumColdKnownSymbols = support::endian::readNext(Ptr); if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols, NumColdKnownSymbols)) return E; uint64_t Num = support::endian::readNext(Ptr); for (uint64_t I = 0; I < Num; ++I) KnownColdHashes.insert( support::endian::readNext(Ptr)); return deserializeRecords(Ptr); } Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const { OS.write(StrToIndexMap.size()); OS.write(KnownColdSymbols.size()); std::vector Strs; Strs.reserve(StrToIndexMap.size() + KnownColdSymbols.size()); for (const auto &Str : StrToIndexMap) Strs.push_back(Str.first.str()); for (const auto &Str : KnownColdSymbols) Strs.push_back(Str.str()); std::string CompressedStrings; if (!Strs.empty()) if (Error E = collectGlobalObjectNameStrings( Strs, compression::zlib::isAvailable(), CompressedStrings)) return E; const uint64_t CompressedStringLen = CompressedStrings.length(); // Record the length of compressed string. OS.write(CompressedStringLen); // Write the chars in compressed strings. for (char C : CompressedStrings) OS.writeByte(static_cast(C)); // Pad up to a multiple of 8. // InstrProfReader could read bytes according to 'CompressedStringLen'. const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) OS.writeByte(0); return Error::success(); } uint64_t DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const { if (std::holds_alternative(SymbolID)) return std::get(SymbolID); auto Iter = StrToIndexMap.find(std::get(SymbolID)); assert(Iter != StrToIndexMap.end() && "String literals not found in StrToIndexMap"); return Iter->second; } Error DataAccessProfData::serialize(ProfOStream &OS) const { if (Error E = serializeSymbolsAndFilenames(OS)) return E; OS.write(KnownColdHashes.size()); for (const auto &Hash : KnownColdHashes) OS.write(Hash); OS.write((uint64_t)(Records.size())); for (const auto &[Key, Rec] : Records) { OS.write(getEncodedIndex(Rec.SymbolID)); OS.writeByte(Rec.IsStringLiteral); OS.write(Rec.AccessCount); OS.write(Rec.Locations.size()); for (const auto &Loc : Rec.Locations) { OS.write(getEncodedIndex(Loc.FileName)); OS.write32(Loc.Line); } } return Error::success(); } Error DataAccessProfData::deserializeSymbolsAndFilenames( const unsigned char *&Ptr, const uint64_t NumSampledSymbols, const uint64_t NumColdKnownSymbols) { uint64_t Len = support::endian::readNext(Ptr); // The first NumSampledSymbols strings are symbols with samples, and next // NumColdKnownSymbols strings are known cold symbols. uint64_t StringCnt = 0; std::function addName = [&](StringRef Name) { if (StringCnt < NumSampledSymbols) saveStringToMap(StrToIndexMap, Saver, Name); else KnownColdSymbols.insert(Saver.save(Name)); ++StringCnt; return Error::success(); }; if (Error E = readAndDecodeStrings(StringRef((const char *)Ptr, Len), addName)) return E; Ptr += alignTo(Len, 8); return Error::success(); } Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) { SmallVector Strings = llvm::to_vector(llvm::make_first_range(getStrToIndexMapRef())); uint64_t NumRecords = support::endian::readNext(Ptr); for (uint64_t I = 0; I < NumRecords; ++I) { uint64_t ID = support::endian::readNext(Ptr); bool IsStringLiteral = support::endian::readNext(Ptr); uint64_t AccessCount = support::endian::readNext(Ptr); SymbolHandleRef SymbolID; if (IsStringLiteral) SymbolID = ID; else SymbolID = Strings[ID]; if (Error E = setDataAccessProfile(SymbolID, AccessCount)) return E; auto &Record = Records.back().second; uint64_t NumLocations = support::endian::readNext(Ptr); Record.Locations.reserve(NumLocations); for (uint64_t J = 0; J < NumLocations; ++J) { uint64_t FileNameIndex = support::endian::readNext(Ptr); uint32_t Line = support::endian::readNext(Ptr); Record.Locations.push_back({Strings[FileNameIndex], Line}); } } return Error::success(); } } // namespace memprof } // namespace llvm