diff options
-rw-r--r-- | llvm/include/llvm/IR/Value.h | 2 | ||||
-rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/IR/Value.cpp | 31 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 14 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LICM.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SROA.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp | 6 | ||||
-rw-r--r-- | llvm/test/Transforms/LICM/hoist-bitcast-load.ll | 160 |
8 files changed, 202 insertions, 21 deletions
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h index aae39ccb..b5bbc96 100644 --- a/llvm/include/llvm/IR/Value.h +++ b/llvm/include/llvm/IR/Value.h @@ -430,7 +430,7 @@ public: /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. - bool isDereferenceablePointer() const; + bool isDereferenceablePointer(const DataLayout *DL = nullptr) const; /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, /// return the value in the PHI node corresponding to PredBB. If not, return diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index e664454..5264745 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -2007,7 +2007,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(); + return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp index 463024a..d61b8e5 100644 --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" @@ -472,18 +473,32 @@ Value *Value::stripInBoundsOffsets() { /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -static bool isDereferenceablePointer(const Value *V, +static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, SmallPtrSet<const Value *, 32> &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // It's also not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. Some cases could - // be handled using DataLayout to check sizes and alignments though. // These are obviously ok. if (isa<AllocaInst>(V)) return true; + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (DL) + if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if ((DL->getTypeStoreSize(STy) >= + DL->getTypeStoreSize(DTy)) && + (DL->getABITypeAlignment(STy) >= + DL->getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + } + // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return !GV->hasExternalWeakLinkage(); @@ -497,7 +512,7 @@ static bool isDereferenceablePointer(const Value *V, // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0))) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -533,9 +548,9 @@ static bool isDereferenceablePointer(const Value *V, /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -bool Value::isDereferenceablePointer() const { +bool Value::isDereferenceablePointer(const DataLayout *DL) const { SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceablePointer(this, Visited); + return ::isDereferenceablePointer(this, DL, Visited); } /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index 97a119b..f9de54a 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,6 +39,7 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" @@ -68,13 +69,14 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { + : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; + const DataLayout *DL; private: CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; @@ -103,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -218,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg, + const DataLayout *DL) { Function *Callee = Arg->getParent(); unsigned ArgNo = Arg->getArgNo(); @@ -229,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL)) return false; } return true; @@ -337,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index bc1db37..abcceb2 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -639,7 +639,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + if (isSafeToSpeculativelyExecute(&Inst, DL)) return true; return isGuaranteedToExecute(Inst); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 6532b7a..8c7f253 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -1130,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) continue; @@ -1198,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); - bool TDerefable = TValue->isDereferenceablePointer(); - bool FDerefable = FValue->isDereferenceablePointer(); + bool TDerefable = TValue->isDereferenceablePointer(DL); + bool FDerefable = FValue->isDereferenceablePointer(DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 58192fc..e2a24a7 100644 --- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1142,8 +1142,8 @@ public: /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { - bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); - bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); for (User *U : SI->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); @@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) continue; diff --git a/llvm/test/Transforms/LICM/hoist-bitcast-load.ll b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll new file mode 100644 index 0000000..bb105e5 --- /dev/null +++ b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll @@ -0,0 +1,160 @@ +; RUN: opt -S -basicaa -licm < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Make sure the basic alloca pointer hoisting works: +; CHECK-LABEL: @test1 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %c = alloca i32 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type: +; CHECK-LABEL: @test2 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + %c = bitcast i64* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting works through a bitcast to a +; pointer to a smaller type (where the bitcast also needs to be hoisted): +; CHECK-LABEL: @test3 +; CHECK: load i32* %c, align 4 +; CHECK: for.body: + +; Function Attrs: nounwind uwtable +define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i64 + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %c = bitcast i64* %ca to i32* + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +; Make sure the basic alloca pointer hoisting does not happen through a bitcast +; to a pointer to a larger type: +; CHECK-LABEL: @test4 +; CHECK: for.body: +; CHECK: load i32* %c, align 4 + +; Function Attrs: nounwind uwtable +define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 { +entry: + %cmp6 = icmp sgt i32 %n, 0 + %ca = alloca i16 + %c = bitcast i16* %ca to i32* + br i1 %cmp6, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv + %0 = load i32* %arrayidx, align 4 + %cmp1 = icmp sgt i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %1 = load i32* %c, align 4 + %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv + %2 = load i32* %arrayidx3, align 4 + %mul = mul nsw i32 %2, %1 + store i32 %mul, i32* %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.inc, %entry + ret void +} + +attributes #0 = { nounwind uwtable } + |