diff options
author | Kyungwoo Lee <kyulee@meta.com> | 2024-10-16 14:36:38 -0700 |
---|---|---|
committer | Kyungwoo Lee <kyulee@meta.com> | 2024-10-16 14:36:50 -0700 |
commit | e715fc6b8fb299d7973ec2a63509277918b88131 (patch) | |
tree | 8d9bae778a38bbfd3ac1a5e4b91e415429f0a1cf | |
parent | 173c68239d1d11f4e36c8af07a28310da67568a7 (diff) | |
download | llvm-users/kyulee-com/strhash-nfc.zip llvm-users/kyulee-com/strhash-nfc.tar.gz llvm-users/kyulee-com/strhash-nfc.tar.bz2 |
[StructuralHash] Refactorusers/kyulee-com/strhash-nfc
- Use stable_hash instead of uint64_t
- Rename update* to hash* functions. They compute stable_hash locally
and return it.
-rw-r--r-- | llvm/include/llvm/IR/StructuralHash.h | 3 | ||||
-rw-r--r-- | llvm/lib/IR/StructuralHash.cpp | 121 | ||||
-rw-r--r-- | llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll | 17 |
3 files changed, 88 insertions, 53 deletions
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index 57fb45d..aa292bc 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,6 +14,7 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/StableHashing.h" #include <cstdint> namespace llvm { @@ -21,7 +22,7 @@ namespace llvm { class Function; class Module; -using IRHash = uint64_t; +using IRHash = stable_hash; /// Returns a hash of the function \p F. /// \param F The function to hash. diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index fb4f33a..a1fabab 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -24,61 +24,86 @@ namespace { // by the MergeFunctions pass. class StructuralHashImpl { - uint64_t Hash = 4; + stable_hash Hash = 4; - void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); } + bool DetailedHash; // This will produce different values on 32-bit and 64-bit systens as // hash_combine returns a size_t. However, this is only used for // detailed hashing which, in-tree, only needs to distinguish between // differences in functions. - template <typename T> void hashArbitaryType(const T &V) { - hash(hash_combine(V)); + // TODO: This is not stable. + template <typename T> stable_hash hashArbitaryType(const T &V) { + return hash_combine(V); } - void hashType(Type *ValueType) { - hash(ValueType->getTypeID()); + stable_hash hashType(Type *ValueType) { + SmallVector<stable_hash> Hashes; + Hashes.emplace_back(ValueType->getTypeID()); if (ValueType->isIntegerTy()) - hash(ValueType->getIntegerBitWidth()); + Hashes.emplace_back(ValueType->getIntegerBitWidth()); + return stable_hash_combine(Hashes); } public: - StructuralHashImpl() = default; - - void updateOperand(Value *Operand) { - hashType(Operand->getType()); - - // The cases enumerated below are not exhaustive and are only aimed to - // get decent coverage over the function. - if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) { - hashArbitaryType(ConstInt->getValue()); - } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) { - hashArbitaryType(ConstFP->getValue()); - } else if (Argument *Arg = dyn_cast<Argument>(Operand)) { - hash(Arg->getArgNo()); - } else if (Function *Func = dyn_cast<Function>(Operand)) { + StructuralHashImpl() = delete; + explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + + stable_hash hashConstant(Constant *C) { + SmallVector<stable_hash> Hashes; + // TODO: hashArbitaryType() is not stable. + if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) { + Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); + } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) { + Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); + } else if (Function *Func = dyn_cast<Function>(C)) // Hashing the name will be deterministic as LLVM's hashing infrastructure // has explicit support for hashing strings and will not simply hash // the pointer. - hashArbitaryType(Func->getName()); - } + Hashes.emplace_back(hashArbitaryType(Func->getName())); + + return stable_hash_combine(Hashes); + } + + stable_hash hashValue(Value *V) { + // Check constant and return its hash. + Constant *C = dyn_cast<Constant>(V); + if (C) + return hashConstant(C); + + // Hash argument number. + SmallVector<stable_hash> Hashes; + if (Argument *Arg = dyn_cast<Argument>(V)) + Hashes.emplace_back(Arg->getArgNo()); + + return stable_hash_combine(Hashes); } - void updateInstruction(const Instruction &Inst, bool DetailedHash) { - hash(Inst.getOpcode()); + stable_hash hashOperand(Value *Operand) { + SmallVector<stable_hash> Hashes; + Hashes.emplace_back(hashType(Operand->getType())); + Hashes.emplace_back(hashValue(Operand)); + return stable_hash_combine(Hashes); + } + + stable_hash hashInstruction(const Instruction &Inst) { + SmallVector<stable_hash> Hashes; + Hashes.emplace_back(Inst.getOpcode()); if (!DetailedHash) - return; + return stable_hash_combine(Hashes); - hashType(Inst.getType()); + Hashes.emplace_back(hashType(Inst.getType())); // Handle additional properties of specific instructions that cause // semantic differences in the IR. if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst)) - hash(ComparisonInstruction->getPredicate()); + Hashes.emplace_back(ComparisonInstruction->getPredicate()); for (const auto &Op : Inst.operands()) - updateOperand(Op); + Hashes.emplace_back(hashOperand(Op)); + + return stable_hash_combine(Hashes); } // A function hash is calculated by considering only the number of arguments @@ -97,15 +122,17 @@ public: // expensive checks for pass modification status). When modifying this // function, most changes should be gated behind an option and enabled // selectively. - void update(const Function &F, bool DetailedHash) { + void update(const Function &F) { // Declarations don't affect analyses. if (F.isDeclaration()) return; - hash(0x62642d6b6b2d6b72); // Function header + SmallVector<stable_hash> Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(0x62642d6b6b2d6b72); // Function header - hash(F.isVarArg()); - hash(F.arg_size()); + Hashes.emplace_back(F.isVarArg()); + Hashes.emplace_back(F.arg_size()); SmallVector<const BasicBlock *, 8> BBs; SmallPtrSet<const BasicBlock *, 16> VisitedBBs; @@ -121,14 +148,17 @@ public: // This random value acts as a block header, as otherwise the partition of // opcodes into BBs wouldn't affect the hash, only the order of the // opcodes - hash(45798); + Hashes.emplace_back(45798); for (auto &Inst : *BB) - updateInstruction(Inst, DetailedHash); + Hashes.emplace_back(hashInstruction(Inst)); for (const BasicBlock *Succ : successors(BB)) if (VisitedBBs.insert(Succ).second) BBs.push_back(Succ); } + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } void update(const GlobalVariable &GV) { @@ -137,15 +167,20 @@ public: // we ignore anything with the `.llvm` prefix if (GV.isDeclaration() || GV.getName().starts_with("llvm.")) return; - hash(23456); // Global header - hash(GV.getValueType()->getTypeID()); + SmallVector<stable_hash> Hashes; + Hashes.emplace_back(Hash); + Hashes.emplace_back(23456); // Global header + Hashes.emplace_back(GV.getValueType()->getTypeID()); + + // Update the combined hash in place. + Hash = stable_hash_combine(Hashes); } - void update(const Module &M, bool DetailedHash) { + void update(const Module &M) { for (const GlobalVariable &GV : M.globals()) update(GV); for (const Function &F : M) - update(F, DetailedHash); + update(F); } uint64_t getHash() const { return Hash; } @@ -154,13 +189,13 @@ public: } // namespace IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) { - StructuralHashImpl H; - H.update(F, DetailedHash); + StructuralHashImpl H(DetailedHash); + H.update(F); return H.getHash(); } IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) { - StructuralHashImpl H; - H.update(M, DetailedHash); + StructuralHashImpl H(DetailedHash); + H.update(M); return H.getHash(); } diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index e7718ca..0ceb363 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -63,6 +63,14 @@ lpad: resume { ptr, i32 } zeroinitializer } +define i8 @call_with_same_range() { +; CHECK-LABEL: @call_with_same_range +; CHECK: tail call i8 @call_with_range + bitcast i8 0 to i8 + %out = call i8 @dummy(), !range !0 + ret i8 %out +} + define i8 @invoke_with_same_range() personality ptr undef { ; CHECK-LABEL: @invoke_with_same_range() ; CHECK: tail call i8 @invoke_with_range() @@ -76,15 +84,6 @@ lpad: resume { ptr, i32 } zeroinitializer } -define i8 @call_with_same_range() { -; CHECK-LABEL: @call_with_same_range -; CHECK: tail call i8 @call_with_range - bitcast i8 0 to i8 - %out = call i8 @dummy(), !range !0 - ret i8 %out -} - - declare i8 @dummy(); declare i32 @__gxx_personality_v0(...) |