diff options
Diffstat (limited to 'llvm/lib/Analysis')
-rwxr-xr-x | llvm/lib/Analysis/ConstantFolding.cpp | 25 | ||||
-rw-r--r-- | llvm/lib/Analysis/IR2Vec.cpp | 180 | ||||
-rw-r--r-- | llvm/lib/Analysis/Loads.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Analysis/ScalarEvolution.cpp | 46 |
4 files changed, 91 insertions, 164 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index b744537..45c889c 100755 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -329,6 +329,7 @@ bool llvm::IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, // Look through ptr->int and ptr->ptr casts. if (CE->getOpcode() == Instruction::PtrToInt || + CE->getOpcode() == Instruction::PtrToAddr || CE->getOpcode() == Instruction::BitCast) return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL, DSOEquiv); @@ -1495,22 +1496,22 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, default: llvm_unreachable("Missing case"); case Instruction::PtrToAddr: - // TODO: Add some of the ptrtoint folds here as well. - break; case Instruction::PtrToInt: if (auto *CE = dyn_cast<ConstantExpr>(C)) { Constant *FoldedValue = nullptr; - // If the input is a inttoptr, eliminate the pair. This requires knowing + // If the input is an inttoptr, eliminate the pair. This requires knowing // the width of a pointer, so it can't be done in ConstantExpr::getCast. if (CE->getOpcode() == Instruction::IntToPtr) { - // zext/trunc the inttoptr to pointer size. - FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0), - DL.getIntPtrType(CE->getType()), + // zext/trunc the inttoptr to pointer/address size. + Type *MidTy = Opcode == Instruction::PtrToInt + ? DL.getAddressType(CE->getType()) + : DL.getIntPtrType(CE->getType()); + FoldedValue = ConstantFoldIntegerCast(CE->getOperand(0), MidTy, /*IsSigned=*/false, DL); } else if (auto *GEP = dyn_cast<GEPOperator>(CE)) { // If we have GEP, we can perform the following folds: - // (ptrtoint (gep null, x)) -> x - // (ptrtoint (gep (gep null, x), y) -> x + y, etc. + // (ptrtoint/ptrtoaddr (gep null, x)) -> x + // (ptrtoint/ptrtoaddr (gep (gep null, x), y) -> x + y, etc. unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType()); APInt BaseOffset(BitWidth, 0); auto *Base = cast<Constant>(GEP->stripAndAccumulateConstantOffsets( @@ -1518,7 +1519,8 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, if (Base->isNullValue()) { FoldedValue = ConstantInt::get(CE->getContext(), BaseOffset); } else { - // ptrtoint (gep i8, Ptr, (sub 0, V)) -> sub (ptrtoint Ptr), V + // ptrtoint/ptrtoaddr (gep i8, Ptr, (sub 0, V)) + // -> sub (ptrtoint/ptrtoaddr Ptr), V if (GEP->getNumIndices() == 1 && GEP->getSourceElementType()->isIntegerTy(8)) { auto *Ptr = cast<Constant>(GEP->getPointerOperand()); @@ -1528,12 +1530,13 @@ Constant *llvm::ConstantFoldCastOperand(unsigned Opcode, Constant *C, Sub->getOpcode() == Instruction::Sub && Sub->getOperand(0)->isNullValue()) FoldedValue = ConstantExpr::getSub( - ConstantExpr::getPtrToInt(Ptr, IntIdxTy), Sub->getOperand(1)); + ConstantExpr::getCast(Opcode, Ptr, IntIdxTy), + Sub->getOperand(1)); } } } if (FoldedValue) { - // Do a zext or trunc to get to the ptrtoint dest size. + // Do a zext or trunc to get to the ptrtoint/ptrtoaddr dest size. return ConstantFoldIntegerCast(FoldedValue, DestTy, /*IsSigned=*/false, DL); } diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 1794a60..85b5372 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -153,11 +153,6 @@ void Embedding::print(raw_ostream &OS) const { // Embedder and its subclasses //===----------------------------------------------------------------------===// -Embedder::Embedder(const Function &F, const Vocabulary &Vocab) - : F(F), Vocab(Vocab), Dimension(Vocab.getDimension()), - OpcWeight(::OpcWeight), TypeWeight(::TypeWeight), ArgWeight(::ArgWeight), - FuncVector(Embedding(Dimension)) {} - std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F, const Vocabulary &Vocab) { switch (Mode) { @@ -169,110 +164,85 @@ std::unique_ptr<Embedder> Embedder::create(IR2VecKind Mode, const Function &F, return nullptr; } -const InstEmbeddingsMap &Embedder::getInstVecMap() const { - if (InstVecMap.empty()) - computeEmbeddings(); - return InstVecMap; -} - -const BBEmbeddingsMap &Embedder::getBBVecMap() const { - if (BBVecMap.empty()) - computeEmbeddings(); - return BBVecMap; -} - -const Embedding &Embedder::getBBVector(const BasicBlock &BB) const { - auto It = BBVecMap.find(&BB); - if (It != BBVecMap.end()) - return It->second; - computeEmbeddings(BB); - return BBVecMap[&BB]; -} +Embedding Embedder::computeEmbeddings() const { + Embedding FuncVector(Dimension, 0.0); -const Embedding &Embedder::getFunctionVector() const { - // Currently, we always (re)compute the embeddings for the function. - // This is cheaper than caching the vector. - computeEmbeddings(); - return FuncVector; -} - -void Embedder::computeEmbeddings() const { if (F.isDeclaration()) - return; - - FuncVector = Embedding(Dimension, 0.0); + return FuncVector; // Consider only the basic blocks that are reachable from entry - for (const BasicBlock *BB : depth_first(&F)) { - computeEmbeddings(*BB); - FuncVector += BBVecMap[BB]; - } + for (const BasicBlock *BB : depth_first(&F)) + FuncVector += computeEmbeddings(*BB); + return FuncVector; } -void SymbolicEmbedder::computeEmbeddings(const BasicBlock &BB) const { +Embedding Embedder::computeEmbeddings(const BasicBlock &BB) const { Embedding BBVector(Dimension, 0); // We consider only the non-debug and non-pseudo instructions - for (const auto &I : BB.instructionsWithoutDebug()) { - Embedding ArgEmb(Dimension, 0); - for (const auto &Op : I.operands()) - ArgEmb += Vocab[*Op]; - auto InstVector = - Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; - if (const auto *IC = dyn_cast<CmpInst>(&I)) - InstVector += Vocab[IC->getPredicate()]; - InstVecMap[&I] = InstVector; - BBVector += InstVector; - } - BBVecMap[&BB] = BBVector; -} - -void FlowAwareEmbedder::computeEmbeddings(const BasicBlock &BB) const { - Embedding BBVector(Dimension, 0); + for (const auto &I : BB.instructionsWithoutDebug()) + BBVector += computeEmbeddings(I); + return BBVector; +} + +Embedding SymbolicEmbedder::computeEmbeddings(const Instruction &I) const { + // Currently, we always (re)compute the embeddings for symbolic embedder. + // This is cheaper than caching the vectors. + Embedding ArgEmb(Dimension, 0); + for (const auto &Op : I.operands()) + ArgEmb += Vocab[*Op]; + auto InstVector = + Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; + if (const auto *IC = dyn_cast<CmpInst>(&I)) + InstVector += Vocab[IC->getPredicate()]; + return InstVector; +} + +Embedding FlowAwareEmbedder::computeEmbeddings(const Instruction &I) const { + // If we have already computed the embedding for this instruction, return it + auto It = InstVecMap.find(&I); + if (It != InstVecMap.end()) + return It->second; - // We consider only the non-debug and non-pseudo instructions - for (const auto &I : BB.instructionsWithoutDebug()) { - // TODO: Handle call instructions differently. - // For now, we treat them like other instructions - Embedding ArgEmb(Dimension, 0); - for (const auto &Op : I.operands()) { - // If the operand is defined elsewhere, we use its embedding - if (const auto *DefInst = dyn_cast<Instruction>(Op)) { - auto DefIt = InstVecMap.find(DefInst); - // Fixme (#159171): Ideally we should never miss an instruction - // embedding here. - // But when we have cyclic dependencies (e.g., phi - // nodes), we might miss the embedding. In such cases, we fall back to - // using the vocabulary embedding. This can be fixed by iterating to a - // fixed-point, or by using a simple solver for the set of simultaneous - // equations. - // Another case when we might miss an instruction embedding is when - // the operand instruction is in a different basic block that has not - // been processed yet. This can be fixed by processing the basic blocks - // in a topological order. - if (DefIt != InstVecMap.end()) - ArgEmb += DefIt->second; - else - ArgEmb += Vocab[*Op]; - } - // If the operand is not defined by an instruction, we use the vocabulary - else { - LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: " - << *Op << "=" << Vocab[*Op][0] << "\n"); + // TODO: Handle call instructions differently. + // For now, we treat them like other instructions + Embedding ArgEmb(Dimension, 0); + for (const auto &Op : I.operands()) { + // If the operand is defined elsewhere, we use its embedding + if (const auto *DefInst = dyn_cast<Instruction>(Op)) { + auto DefIt = InstVecMap.find(DefInst); + // Fixme (#159171): Ideally we should never miss an instruction + // embedding here. + // But when we have cyclic dependencies (e.g., phi + // nodes), we might miss the embedding. In such cases, we fall back to + // using the vocabulary embedding. This can be fixed by iterating to a + // fixed-point, or by using a simple solver for the set of simultaneous + // equations. + // Another case when we might miss an instruction embedding is when + // the operand instruction is in a different basic block that has not + // been processed yet. This can be fixed by processing the basic blocks + // in a topological order. + if (DefIt != InstVecMap.end()) + ArgEmb += DefIt->second; + else ArgEmb += Vocab[*Op]; - } } - // Create the instruction vector by combining opcode, type, and arguments - // embeddings - auto InstVector = - Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; - // Add compare predicate embedding as an additional operand if applicable - if (const auto *IC = dyn_cast<CmpInst>(&I)) - InstVector += Vocab[IC->getPredicate()]; - InstVecMap[&I] = InstVector; - BBVector += InstVector; + // If the operand is not defined by an instruction, we use the + // vocabulary + else { + LLVM_DEBUG(errs() << "Using embedding from vocabulary for operand: " + << *Op << "=" << Vocab[*Op][0] << "\n"); + ArgEmb += Vocab[*Op]; + } } - BBVecMap[&BB] = BBVector; + // Create the instruction vector by combining opcode, type, and arguments + // embeddings + auto InstVector = + Vocab[I.getOpcode()] + Vocab[I.getType()->getTypeID()] + ArgEmb; + if (const auto *IC = dyn_cast<CmpInst>(&I)) + InstVector += Vocab[IC->getPredicate()]; + InstVecMap[&I] = InstVector; + return InstVector; } // ==----------------------------------------------------------------------===// @@ -695,25 +665,17 @@ PreservedAnalyses IR2VecPrinterPass::run(Module &M, Emb->getFunctionVector().print(OS); OS << "Basic block vectors:\n"; - const auto &BBMap = Emb->getBBVecMap(); for (const BasicBlock &BB : F) { - auto It = BBMap.find(&BB); - if (It != BBMap.end()) { - OS << "Basic block: " << BB.getName() << ":\n"; - It->second.print(OS); - } + OS << "Basic block: " << BB.getName() << ":\n"; + Emb->getBBVector(BB).print(OS); } OS << "Instruction vectors:\n"; - const auto &InstMap = Emb->getInstVecMap(); for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { - auto It = InstMap.find(&I); - if (It != InstMap.end()) { - OS << "Instruction: "; - I.print(OS); - It->second.print(OS); - } + OS << "Instruction: "; + I.print(OS); + Emb->getInstVector(I).print(OS); } } } diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 4c2e1fe..54f55b2 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -812,7 +812,9 @@ static bool isPointerUseReplacable(const Use &U) { auto *User = Worklist.pop_back_val(); if (!Visited.insert(User).second) continue; - if (isa<ICmpInst, PtrToIntInst>(User)) + // FIXME: The PtrToIntInst case here is not strictly correct, as it + // changes which provenance is exposed. + if (isa<ICmpInst, PtrToIntInst, PtrToAddrInst>(User)) continue; if (isa<PHINode, SelectInst>(User)) Worklist.append(User->user_begin(), User->user_end()); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6f6776c..30bcff7 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15749,51 +15749,11 @@ void ScalarEvolution::LoopGuards::collectFromBlock( return RewriteMap.lookup_or(S, S); }; - // Check for the SCEV expression (A /u B) * B while B is a constant, inside - // \p Expr. The check is done recuresively on \p Expr, which is assumed to - // be a composition of Min/Max SCEVs. Return whether the SCEV expression (A - // /u B) * B was found, and return the divisor B in \p DividesBy. For - // example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since - // (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p - // DividesBy. - std::function<bool(const SCEV *, const SCEV *&)> HasDivisibiltyInfo = - [&](const SCEV *Expr, const SCEV *&DividesBy) { - if (auto *Mul = dyn_cast<SCEVMulExpr>(Expr)) { - if (Mul->getNumOperands() != 2) - return false; - auto *MulLHS = Mul->getOperand(0); - auto *MulRHS = Mul->getOperand(1); - if (isa<SCEVConstant>(MulLHS)) - std::swap(MulLHS, MulRHS); - if (auto *Div = dyn_cast<SCEVUDivExpr>(MulLHS)) - if (Div->getOperand(1) == MulRHS) { - DividesBy = MulRHS; - return true; - } - } - if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) - return HasDivisibiltyInfo(MinMax->getOperand(0), DividesBy) || - HasDivisibiltyInfo(MinMax->getOperand(1), DividesBy); - return false; - }; - - // Return true if Expr known to divide by \p DividesBy. - std::function<bool(const SCEV *, const SCEV *&)> IsKnownToDivideBy = - [&](const SCEV *Expr, const SCEV *DividesBy) { - if (SE.getURemExpr(Expr, DividesBy)->isZero()) - return true; - if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) - return IsKnownToDivideBy(MinMax->getOperand(0), DividesBy) && - IsKnownToDivideBy(MinMax->getOperand(1), DividesBy); - return false; - }; - const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); const SCEV *DividesBy = nullptr; - if (HasDivisibiltyInfo(RewrittenLHS, DividesBy)) - // Check that the whole expression is divided by DividesBy - DividesBy = - IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy : nullptr; + const APInt &Multiple = SE.getConstantMultiple(RewrittenLHS); + if (!Multiple.isOne()) + DividesBy = SE.getConstant(Multiple); // Collect rewrites for LHS and its transitive operands based on the // condition. |