1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
|
//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
using namespace llvm;
namespace {
// Basic hashing mechanism to detect structural change to the IR, used to verify
// pass return status consistency with actual change. In addition to being used
// by the MergeFunctions pass.
class StructuralHashImpl {
stable_hash Hash = 4;
bool DetailedHash;
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
static constexpr stable_hash BlockHeaderHash = 45798;
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;
/// IgnoreOp is a function that returns true if the operand should be ignored.
IgnoreOperandFunc IgnoreOp = nullptr;
/// A mapping from instruction indices to instruction pointers.
/// The index represents the position of an instruction based on the order in
/// which it is first encountered.
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands.
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
/// Assign a unique ID to each Value in the order they are first seen.
DenseMap<const Value *, int> ValueToId;
static stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
Hashes.emplace_back(ValueType->getIntegerBitWidth());
return stable_hash_combine(Hashes);
}
public:
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash,
IgnoreOperandFunc IgnoreOp = nullptr)
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
if (IgnoreOp) {
IndexInstruction = std::make_unique<IndexInstrMap>();
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
}
}
static stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(I.getBitWidth());
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
Hashes.append(RawVals.begin(), RawVals.end());
return stable_hash_combine(Hashes);
}
static stable_hash hashAPFloat(const APFloat &F) {
return hashAPInt(F.bitcastToAPInt());
}
static stable_hash hashGlobalVariable(const GlobalVariable &GVar) {
if (!GVar.hasInitializer())
return hashGlobalValue(&GVar);
// Hash the contents of a string.
if (GVar.getName().starts_with(".str")) {
auto *C = GVar.getInitializer();
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C))
if (Seq->isString())
return stable_hash_name(Seq->getAsString());
}
// Hash structural contents of Objective-C metadata in specific sections.
// This can be extended to other metadata if needed.
static constexpr const char *SectionNames[] = {
"__cfstring", "__cstring", "__objc_classrefs",
"__objc_methname", "__objc_selrefs",
};
if (GVar.hasSection()) {
StringRef SectionName = GVar.getSection();
for (const char *Name : SectionNames)
if (SectionName.contains(Name))
return hashConstant(GVar.getInitializer());
}
return hashGlobalValue(&GVar);
}
static stable_hash hashGlobalValue(const GlobalValue *GV) {
if (!GV->hasName())
return 0;
return stable_hash_name(GV->getName());
}
// Compute a hash for a Constant. This function is logically similar to
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
// we're interested in computing a hash rather than comparing two Constants.
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
static stable_hash hashConstant(const Constant *C) {
SmallVector<stable_hash> Hashes;
Type *Ty = C->getType();
Hashes.emplace_back(hashType(Ty));
if (C->isNullValue()) {
Hashes.emplace_back(static_cast<stable_hash>('N'));
return stable_hash_combine(Hashes);
}
if (auto *GVar = dyn_cast<GlobalVariable>(C)) {
Hashes.emplace_back(hashGlobalVariable(*GVar));
return stable_hash_combine(Hashes);
}
if (auto *G = dyn_cast<GlobalValue>(C)) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
if (Seq->isString()) {
Hashes.emplace_back(stable_hash_name(Seq->getAsString()));
return stable_hash_combine(Hashes);
}
}
switch (C->getValueID()) {
case Value::ConstantIntVal: {
const APInt &Int = cast<ConstantInt>(C)->getValue();
Hashes.emplace_back(hashAPInt(Int));
return stable_hash_combine(Hashes);
}
case Value::ConstantFPVal: {
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
Hashes.emplace_back(hashAPFloat(APF));
return stable_hash_combine(Hashes);
}
case Value::ConstantArrayVal:
case Value::ConstantStructVal:
case Value::ConstantVectorVal:
case Value::ConstantExprVal: {
for (const auto &Op : C->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::BlockAddressVal: {
const BlockAddress *BA = cast<BlockAddress>(C);
auto H = hashGlobalValue(BA->getFunction());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
case Value::DSOLocalEquivalentVal: {
const auto *Equiv = cast<DSOLocalEquivalent>(C);
auto H = hashGlobalValue(Equiv->getGlobalValue());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
default:
// Skip other types of constants for simplicity.
return stable_hash_combine(Hashes);
}
}
stable_hash hashValue(Value *V) {
// Check constant and return its hash.
Constant *C = dyn_cast<Constant>(V);
if (C)
return hashConstant(C);
// Hash argument number.
SmallVector<stable_hash> Hashes;
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());
// Get an index (an insertion order) for the non-constant value.
auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
Hashes.emplace_back(It->second);
return stable_hash_combine(Hashes);
}
stable_hash hashOperand(Value *Operand) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(hashType(Operand->getType()));
Hashes.emplace_back(hashValue(Operand));
return stable_hash_combine(Hashes);
}
stable_hash hashInstruction(const Instruction &Inst) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Inst.getOpcode());
if (!DetailedHash)
return stable_hash_combine(Hashes);
Hashes.emplace_back(hashType(Inst.getType()));
// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
Hashes.emplace_back(ComparisonInstruction->getPredicate());
unsigned InstIdx = 0;
if (IndexInstruction) {
InstIdx = IndexInstruction->size();
IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
}
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
auto OpndHash = hashOperand(Op);
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
assert(IndexOperandHashMap);
IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
} else
Hashes.emplace_back(OpndHash);
}
return stable_hash_combine(Hashes);
}
// A function hash is calculated by considering only the number of arguments
// and whether a function is varargs, the order of basic blocks (given by the
// successors of each basic block in depth first order), and the order of
// opcodes of each instruction within each of these basic blocks. This mirrors
// the strategy FunctionComparator::compare() uses to compare functions by
// walking the BBs in depth first order and comparing each instruction in
// sequence. Because this hash currently does not look at the operands, it is
// insensitive to things such as the target of calls and the constants used in
// the function, which makes it useful when possibly merging functions which
// are the same modulo constants and call targets.
//
// Note that different users of StructuralHash will want different behavior
// out of it (i.e., MergeFunctions will want something different from PM
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(FunctionHeaderHash);
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
// Walk the blocks in the same order as
// FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
// function "structure." (BB and opcode sequence)
BBs.push_back(&F.getEntryBlock());
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
Hashes.emplace_back(BlockHeaderHash);
for (auto &Inst : *BB)
Hashes.emplace_back(hashInstruction(Inst));
for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}
// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}
void update(const GlobalVariable &GV) {
// Declarations and used/compiler.used don't affect analyses.
// Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());
// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}
void update(const Module &M) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
update(F);
}
uint64_t getHash() const { return Hash; }
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
return std::move(IndexInstruction);
}
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
return std::move(IndexOperandHashMap);
}
};
} // namespace
stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(F);
return H.getHash();
}
stable_hash llvm::StructuralHash(const GlobalVariable &GVar) {
return StructuralHashImpl::hashGlobalVariable(GVar);
}
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(M);
return H.getHash();
}
FunctionHashInfo
llvm::StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp) {
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
H.update(F);
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
H.getIndexPairOpndHashMap());
}
|