1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
|
#include "llvm/ProfileData/DataAccessProf.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace memprof {
// If `Map` has an entry keyed by `Str`, returns the entry iterator. Otherwise,
// creates an owned copy of `Str`, adds a map entry for it and returns the
// iterator.
static std::pair<StringRef, uint64_t>
saveStringToMap(DataAccessProfData::StringToIndexMap &Map,
llvm::UniqueStringSaver &Saver, StringRef Str) {
auto [Iter, Inserted] = Map.try_emplace(Saver.save(Str), Map.size());
return *Iter;
}
// Returns the canonical name or error.
static Expected<StringRef> getCanonicalName(StringRef Name) {
if (Name.empty())
return make_error<StringError>("Empty symbol name",
llvm::errc::invalid_argument);
return InstrProfSymtab::getCanonicalName(Name);
}
std::optional<DataAccessProfRecord>
DataAccessProfData::getProfileRecord(const SymbolHandleRef SymbolID) const {
auto Key = SymbolID;
if (std::holds_alternative<StringRef>(SymbolID)) {
auto NameOrErr = getCanonicalName(std::get<StringRef>(SymbolID));
// If name canonicalization fails, suppress the error inside.
if (!NameOrErr) {
assert(
std::get<StringRef>(SymbolID).empty() &&
"Name canonicalization only fails when stringified string is empty.");
return std::nullopt;
}
Key = *NameOrErr;
}
auto It = Records.find(Key);
if (It != Records.end()) {
return DataAccessProfRecord(Key, It->second.AccessCount,
It->second.Locations);
}
return std::nullopt;
}
bool DataAccessProfData::isKnownColdSymbol(const SymbolHandleRef SymID) const {
if (std::holds_alternative<uint64_t>(SymID))
return KnownColdHashes.contains(std::get<uint64_t>(SymID));
return KnownColdSymbols.contains(std::get<StringRef>(SymID));
}
Error DataAccessProfData::setDataAccessProfile(SymbolHandleRef Symbol,
uint64_t AccessCount) {
uint64_t RecordID = -1;
const bool IsStringLiteral = std::holds_alternative<uint64_t>(Symbol);
SymbolHandleRef Key;
if (IsStringLiteral) {
RecordID = std::get<uint64_t>(Symbol);
Key = RecordID;
} else {
auto CanonicalName = getCanonicalName(std::get<StringRef>(Symbol));
if (!CanonicalName)
return CanonicalName.takeError();
std::tie(Key, RecordID) =
saveStringToMap(StrToIndexMap, Saver, *CanonicalName);
}
auto [Iter, Inserted] =
Records.try_emplace(Key, RecordID, AccessCount, IsStringLiteral);
if (!Inserted)
return make_error<StringError>("Duplicate symbol or string literal added. "
"User of DataAccessProfData should "
"aggregate count for the same symbol. ",
llvm::errc::invalid_argument);
return Error::success();
}
Error DataAccessProfData::setDataAccessProfile(
SymbolHandleRef SymbolID, uint64_t AccessCount,
ArrayRef<SourceLocation> Locations) {
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
return E;
auto &Record = Records.back().second;
for (const auto &Location : Locations)
Record.Locations.push_back(
{saveStringToMap(StrToIndexMap, Saver, Location.FileName).first,
Location.Line});
return Error::success();
}
Error DataAccessProfData::addKnownSymbolWithoutSamples(
SymbolHandleRef SymbolID) {
if (std::holds_alternative<uint64_t>(SymbolID)) {
KnownColdHashes.insert(std::get<uint64_t>(SymbolID));
return Error::success();
}
auto CanonicalName = getCanonicalName(std::get<StringRef>(SymbolID));
if (!CanonicalName)
return CanonicalName.takeError();
KnownColdSymbols.insert(
saveStringToMap(StrToIndexMap, Saver, *CanonicalName).first);
return Error::success();
}
Error DataAccessProfData::deserialize(const unsigned char *&Ptr) {
uint64_t NumSampledSymbols =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint64_t NumColdKnownSymbols =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
if (Error E = deserializeSymbolsAndFilenames(Ptr, NumSampledSymbols,
NumColdKnownSymbols))
return E;
uint64_t Num =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < Num; ++I)
KnownColdHashes.insert(
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr));
return deserializeRecords(Ptr);
}
Error DataAccessProfData::serializeSymbolsAndFilenames(ProfOStream &OS) const {
OS.write(StrToIndexMap.size());
OS.write(KnownColdSymbols.size());
std::vector<std::string> Strs;
Strs.reserve(StrToIndexMap.size() + KnownColdSymbols.size());
for (const auto &Str : StrToIndexMap)
Strs.push_back(Str.first.str());
for (const auto &Str : KnownColdSymbols)
Strs.push_back(Str.str());
std::string CompressedStrings;
if (!Strs.empty())
if (Error E = collectGlobalObjectNameStrings(
Strs, compression::zlib::isAvailable(), CompressedStrings))
return E;
const uint64_t CompressedStringLen = CompressedStrings.length();
// Record the length of compressed string.
OS.write(CompressedStringLen);
// Write the chars in compressed strings.
for (char C : CompressedStrings)
OS.writeByte(static_cast<uint8_t>(C));
// Pad up to a multiple of 8.
// InstrProfReader could read bytes according to 'CompressedStringLen'.
const uint64_t PaddedLength = alignTo(CompressedStringLen, 8);
for (uint64_t K = CompressedStringLen; K < PaddedLength; K++)
OS.writeByte(0);
return Error::success();
}
uint64_t
DataAccessProfData::getEncodedIndex(const SymbolHandleRef SymbolID) const {
if (std::holds_alternative<uint64_t>(SymbolID))
return std::get<uint64_t>(SymbolID);
auto Iter = StrToIndexMap.find(std::get<StringRef>(SymbolID));
assert(Iter != StrToIndexMap.end() &&
"String literals not found in StrToIndexMap");
return Iter->second;
}
Error DataAccessProfData::serialize(ProfOStream &OS) const {
if (Error E = serializeSymbolsAndFilenames(OS))
return E;
OS.write(KnownColdHashes.size());
for (const auto &Hash : KnownColdHashes)
OS.write(Hash);
OS.write((uint64_t)(Records.size()));
for (const auto &[Key, Rec] : Records) {
OS.write(getEncodedIndex(Rec.SymbolID));
OS.writeByte(Rec.IsStringLiteral);
OS.write(Rec.AccessCount);
OS.write(Rec.Locations.size());
for (const auto &Loc : Rec.Locations) {
OS.write(getEncodedIndex(Loc.FileName));
OS.write32(Loc.Line);
}
}
return Error::success();
}
Error DataAccessProfData::deserializeSymbolsAndFilenames(
const unsigned char *&Ptr, const uint64_t NumSampledSymbols,
const uint64_t NumColdKnownSymbols) {
uint64_t Len =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
// The first NumSampledSymbols strings are symbols with samples, and next
// NumColdKnownSymbols strings are known cold symbols.
uint64_t StringCnt = 0;
std::function<Error(StringRef)> addName = [&](StringRef Name) {
if (StringCnt < NumSampledSymbols)
saveStringToMap(StrToIndexMap, Saver, Name);
else
KnownColdSymbols.insert(Saver.save(Name));
++StringCnt;
return Error::success();
};
if (Error E =
readAndDecodeStrings(StringRef((const char *)Ptr, Len), addName))
return E;
Ptr += alignTo(Len, 8);
return Error::success();
}
Error DataAccessProfData::deserializeRecords(const unsigned char *&Ptr) {
SmallVector<StringRef> Strings =
llvm::to_vector(llvm::make_first_range(getStrToIndexMapRef()));
uint64_t NumRecords =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
for (uint64_t I = 0; I < NumRecords; ++I) {
uint64_t ID =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
bool IsStringLiteral =
support::endian::readNext<uint8_t, llvm::endianness::little>(Ptr);
uint64_t AccessCount =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
SymbolHandleRef SymbolID;
if (IsStringLiteral)
SymbolID = ID;
else
SymbolID = Strings[ID];
if (Error E = setDataAccessProfile(SymbolID, AccessCount))
return E;
auto &Record = Records.back().second;
uint64_t NumLocations =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
Record.Locations.reserve(NumLocations);
for (uint64_t J = 0; J < NumLocations; ++J) {
uint64_t FileNameIndex =
support::endian::readNext<uint64_t, llvm::endianness::little>(Ptr);
uint32_t Line =
support::endian::readNext<uint32_t, llvm::endianness::little>(Ptr);
Record.Locations.push_back({Strings[FileNameIndex], Line});
}
}
return Error::success();
}
} // namespace memprof
} // namespace llvm
|