aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/Value.h2
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp2
-rw-r--r--llvm/lib/IR/Value.cpp31
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp6
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp6
-rw-r--r--llvm/test/Transforms/LICM/hoist-bitcast-load.ll160
8 files changed, 202 insertions, 21 deletions
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index aae39ccb..b5bbc96 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -430,7 +430,7 @@ public:
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
- bool isDereferenceablePointer() const;
+ bool isDereferenceablePointer(const DataLayout *DL = nullptr) const;
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
/// return the value in the PHI node corresponding to PredBB. If not, return
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index e664454..5264745 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2007,7 +2007,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
// Speculative load may create a race that did not exist in the source.
LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
return false;
- return LI->getPointerOperand()->isDereferenceablePointer();
+ return LI->getPointerOperand()->isDereferenceablePointer(TD);
}
case Instruction::Call: {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index 463024a..d61b8e5 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
@@ -472,18 +473,32 @@ Value *Value::stripInBoundsOffsets() {
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-static bool isDereferenceablePointer(const Value *V,
+static bool isDereferenceablePointer(const Value *V, const DataLayout *DL,
SmallPtrSet<const Value *, 32> &Visited) {
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- // It's also not always safe to follow a bitcast, for example:
- // bitcast i8* (alloca i8) to i32*
- // would result in a 4-byte load from a 1-byte alloca. Some cases could
- // be handled using DataLayout to check sizes and alignments though.
// These are obviously ok.
if (isa<AllocaInst>(V)) return true;
+ // It's not always safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca. However,
+ // if we're casting from a pointer from a type of larger size
+ // to a type of smaller size (or the same size), and the alignment
+ // is at least as large as for the resulting pointer type, then
+ // we can look through the bitcast.
+ if (DL)
+ if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) {
+ Type *STy = BC->getSrcTy()->getPointerElementType(),
+ *DTy = BC->getDestTy()->getPointerElementType();
+ if ((DL->getTypeStoreSize(STy) >=
+ DL->getTypeStoreSize(DTy)) &&
+ (DL->getABITypeAlignment(STy) >=
+ DL->getABITypeAlignment(DTy)))
+ return isDereferenceablePointer(BC->getOperand(0), DL, Visited);
+ }
+
// Global variables which can't collapse to null are ok.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return !GV->hasExternalWeakLinkage();
@@ -497,7 +512,7 @@ static bool isDereferenceablePointer(const Value *V,
// Conservatively require that the base pointer be fully dereferenceable.
if (!Visited.insert(GEP->getOperand(0)))
return false;
- if (!isDereferenceablePointer(GEP->getOperand(0), Visited))
+ if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited))
return false;
// Check the indices.
gep_type_iterator GTI = gep_type_begin(GEP);
@@ -533,9 +548,9 @@ static bool isDereferenceablePointer(const Value *V,
/// isDereferenceablePointer - Test if this value is always a pointer to
/// allocated and suitably aligned memory for a simple load or store.
-bool Value::isDereferenceablePointer() const {
+bool Value::isDereferenceablePointer(const DataLayout *DL) const {
SmallPtrSet<const Value *, 32> Visited;
- return ::isDereferenceablePointer(this, Visited);
+ return ::isDereferenceablePointer(this, DL, Visited);
}
/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 97a119b..f9de54a 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -39,6 +39,7 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Instructions.h"
@@ -68,13 +69,14 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override;
static char ID; // Pass identification, replacement for typeid
explicit ArgPromotion(unsigned maxElements = 3)
- : CallGraphSCCPass(ID), maxElements(maxElements) {
+ : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) {
initializeArgPromotionPass(*PassRegistry::getPassRegistry());
}
/// A vector used to hold the indices of a single GEP instruction
typedef std::vector<uint64_t> IndicesVector;
+ const DataLayout *DL;
private:
CallGraphNode *PromoteArguments(CallGraphNode *CGN);
bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
@@ -103,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
bool Changed = false, LocalChange;
+ DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+
do { // Iterate until we stop promoting from this SCC.
LocalChange = false;
// Attempt to promote arguments from all functions in this SCC.
@@ -218,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
/// all callees pass in a valid pointer for the specified function argument.
-static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg,
+ const DataLayout *DL) {
Function *Callee = Arg->getParent();
unsigned ArgNo = Arg->getArgNo();
@@ -229,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
CallSite CS(U);
assert(CS && "Should only have direct calls!");
- if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+ if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL))
return false;
}
return true;
@@ -337,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg,
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg))
+ if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index bc1db37..abcceb2 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -639,7 +639,7 @@ void LICM::hoist(Instruction &I) {
///
bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
// If it is not a trapping instruction, it is always safe to hoist.
- if (isSafeToSpeculativelyExecute(&Inst))
+ if (isSafeToSpeculativelyExecute(&Inst, DL))
return true;
return isGuaranteedToExecute(Inst);
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 6532b7a..8c7f253 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1130,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
// If this pointer is always safe to load, or if we can prove that there
// is already a load in the block, then we can move the load to the pred
// block.
- if (InVal->isDereferenceablePointer() ||
+ if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL))
continue;
@@ -1198,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI,
const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
- bool TDerefable = TValue->isDereferenceablePointer();
- bool FDerefable = FValue->isDereferenceablePointer();
+ bool TDerefable = TValue->isDereferenceablePointer(DL);
+ bool FDerefable = FValue->isDereferenceablePointer(DL);
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
diff --git a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 58192fc..e2a24a7 100644
--- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -1142,8 +1142,8 @@ public:
/// We can do this to a select if its only uses are loads and if the operand to
/// the select can be loaded unconditionally.
static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
- bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
- bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+ bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL);
+ bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL);
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
@@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
// If this pointer is always safe to load, or if we can prove that there is
// already a load in the block, then we can move the load to the pred block.
- if (InVal->isDereferenceablePointer() ||
+ if (InVal->isDereferenceablePointer(DL) ||
isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL))
continue;
diff --git a/llvm/test/Transforms/LICM/hoist-bitcast-load.ll b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll
new file mode 100644
index 0000000..bb105e5
--- /dev/null
+++ b/llvm/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -0,0 +1,160 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure the basic alloca pointer hoisting works:
+; CHECK-LABEL: @test1
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test1(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %c = alloca i32
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type:
+; CHECK-LABEL: @test2
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test2(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i64
+ %c = bitcast i64* %ca to i32*
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting works through a bitcast to a
+; pointer to a smaller type (where the bitcast also needs to be hoisted):
+; CHECK-LABEL: @test3
+; CHECK: load i32* %c, align 4
+; CHECK: for.body:
+
+; Function Attrs: nounwind uwtable
+define void @test3(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i64
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %c = bitcast i64* %ca to i32*
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+; Make sure the basic alloca pointer hoisting does not happen through a bitcast
+; to a pointer to a larger type:
+; CHECK-LABEL: @test4
+; CHECK: for.body:
+; CHECK: load i32* %c, align 4
+
+; Function Attrs: nounwind uwtable
+define void @test4(i32* nocapture %a, i32* nocapture readonly %b, i32 %n) #0 {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ %ca = alloca i16
+ %c = bitcast i16* %ca to i32*
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32* %b, i64 %indvars.iv
+ %2 = load i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+attributes #0 = { nounwind uwtable }
+