diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar')
-rw-r--r-- | llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVN.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/GVNSink.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/InferAlignment.cpp | 17 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 49 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp | 43 | ||||
-rw-r--r-- | llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp | 1 |
8 files changed, 81 insertions, 46 deletions
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp index 6141b6d..4ac1321 100644 --- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -272,7 +272,7 @@ static OverwriteResult isMaskedStoreOverwrite(const Instruction *KillingI, if (KillingII->getIntrinsicID() == Intrinsic::masked_store) { // Masks. // TODO: check that KillingII's mask is a superset of the DeadII's mask. - if (KillingII->getArgOperand(3) != DeadII->getArgOperand(3)) + if (KillingII->getArgOperand(2) != DeadII->getArgOperand(2)) return OW_Unknown; } else if (KillingII->getIntrinsicID() == Intrinsic::vp_store) { // Masks. diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 2afa7b7..e30f306 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1017,14 +1017,14 @@ private: }; auto MaskOp = [](const IntrinsicInst *II) { if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(2); + return II->getOperand(1); if (II->getIntrinsicID() == Intrinsic::masked_store) - return II->getOperand(3); + return II->getOperand(2); llvm_unreachable("Unexpected IntrinsicInst"); }; auto ThruOp = [](const IntrinsicInst *II) { if (II->getIntrinsicID() == Intrinsic::masked_load) - return II->getOperand(3); + return II->getOperand(2); llvm_unreachable("Unexpected IntrinsicInst"); }; diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 42db424..72e1131 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -2212,11 +2212,11 @@ bool GVNPass::processMaskedLoad(IntrinsicInst *I) { if (!DepInst || !Dep.isLocal() || !Dep.isDef()) return false; - Value *Mask = I->getOperand(2); - Value *Passthrough = I->getOperand(3); + Value *Mask = I->getOperand(1); + Value *Passthrough = I->getOperand(2); Value *StoreVal; - if (!match(DepInst, m_MaskedStore(m_Value(StoreVal), m_Value(), m_Value(), - m_Specific(Mask))) || + if (!match(DepInst, + m_MaskedStore(m_Value(StoreVal), m_Value(), m_Specific(Mask))) || StoreVal->getType() != I->getType()) return false; diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp index b9534def..a06f832 100644 --- a/llvm/lib/Transforms/Scalar/GVNSink.cpp +++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -430,6 +430,7 @@ public: case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::BitCast: case Instruction::AddrSpaceCast: diff --git a/llvm/lib/Transforms/Scalar/InferAlignment.cpp b/llvm/lib/Transforms/Scalar/InferAlignment.cpp index 995b803..39751c0 100644 --- a/llvm/lib/Transforms/Scalar/InferAlignment.cpp +++ b/llvm/lib/Transforms/Scalar/InferAlignment.cpp @@ -45,25 +45,20 @@ static bool tryToImproveAlign( switch (II->getIntrinsicID()) { case Intrinsic::masked_load: case Intrinsic::masked_store: { - int AlignOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 1 : 2; - Value *PtrOp = II->getIntrinsicID() == Intrinsic::masked_load - ? II->getArgOperand(0) - : II->getArgOperand(1); + unsigned PtrOpIdx = II->getIntrinsicID() == Intrinsic::masked_load ? 0 : 1; + Value *PtrOp = II->getArgOperand(PtrOpIdx); Type *Type = II->getIntrinsicID() == Intrinsic::masked_load ? II->getType() : II->getArgOperand(0)->getType(); - Align OldAlign = - cast<ConstantInt>(II->getArgOperand(AlignOpIdx))->getAlignValue(); + Align OldAlign = II->getParamAlign(PtrOpIdx).valueOrOne(); Align PrefAlign = DL.getPrefTypeAlign(Type); Align NewAlign = Fn(PtrOp, OldAlign, PrefAlign); - if (NewAlign <= OldAlign || - NewAlign.value() > std::numeric_limits<uint32_t>().max()) + if (NewAlign <= OldAlign) return false; - Value *V = - ConstantInt::get(Type::getInt32Ty(II->getContext()), NewAlign.value()); - II->setOperand(AlignOpIdx, V); + II->addParamAttr(PtrOpIdx, + Attribute::getWithAlignment(II->getContext(), NewAlign)); return true; } default: diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 28ae4f0..9aaf6a5 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -43,6 +43,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include <cassert> #include <utility> @@ -1872,6 +1873,51 @@ static void moveLCSSAPhis(BasicBlock *InnerExit, BasicBlock *InnerHeader, InnerLatch->replacePhiUsesWith(InnerLatch, OuterLatch); } +/// This deals with a corner case when a LCSSA phi node appears in a non-exit +/// block: the outer loop latch block does not need to be exit block of the +/// inner loop. Consider a loop that was in LCSSA form, but then some +/// transformation like loop-unswitch comes along and creates an empty block, +/// where BB5 in this example is the outer loop latch block: +/// +/// BB4: +/// br label %BB5 +/// BB5: +/// %old.cond.lcssa = phi i16 [ %cond, %BB4 ] +/// br outer.header +/// +/// Interchange then brings it in LCSSA form again resulting in this chain of +/// single-input phi nodes: +/// +/// BB4: +/// %new.cond.lcssa = phi i16 [ %cond, %BB3 ] +/// br label %BB5 +/// BB5: +/// %old.cond.lcssa = phi i16 [ %new.cond.lcssa, %BB4 ] +/// +/// The problem is that interchange can reoder blocks BB4 and BB5 placing the +/// use before the def if we don't check this. The solution is to simplify +/// lcssa phi nodes (remove) if they appear in non-exit blocks. +/// +static void simplifyLCSSAPhis(Loop *OuterLoop, Loop *InnerLoop) { + BasicBlock *InnerLoopExit = InnerLoop->getExitBlock(); + BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch(); + + // Do not modify lcssa phis where they actually belong, i.e. in exit blocks. + if (OuterLoopLatch == InnerLoopExit) + return; + + // Collect and remove phis in non-exit blocks if they have 1 input. + SmallVector<PHINode *, 8> Phis( + llvm::make_pointer_range(OuterLoopLatch->phis())); + for (PHINode *Phi : Phis) { + assert(Phi->getNumIncomingValues() == 1 && "Single input phi expected"); + LLVM_DEBUG(dbgs() << "Removing 1-input phi in non-exit block: " << *Phi + << "\n"); + Phi->replaceAllUsesWith(Phi->getIncomingValue(0)); + Phi->eraseFromParent(); + } +} + bool LoopInterchangeTransform::adjustLoopBranches() { LLVM_DEBUG(dbgs() << "adjustLoopBranches called\n"); std::vector<DominatorTree::UpdateType> DTUpdates; @@ -1882,6 +1928,9 @@ bool LoopInterchangeTransform::adjustLoopBranches() { assert(OuterLoopPreHeader != OuterLoop->getHeader() && InnerLoopPreHeader != InnerLoop->getHeader() && OuterLoopPreHeader && InnerLoopPreHeader && "Guaranteed by loop-simplify form"); + + simplifyLCSSAPhis(OuterLoop, InnerLoop); + // Ensure that both preheaders do not contain PHI nodes and have single // predecessors. This allows us to move them easily. We use // InsertPreHeaderForLoop to create an 'extra' preheader, if the existing diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp index 42d6680..146e7d1 100644 --- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp +++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp @@ -111,7 +111,7 @@ static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, } // Translate a masked load intrinsic like -// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, // <16 x i1> %mask, <16 x i32> %passthru) // to a chain of basic blocks, with loading element one-by-one if // the appropriate mask bit is set @@ -146,11 +146,10 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptr = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(1); + Value *Src0 = CI->getArgOperand(2); - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + const Align AlignVal = CI->getParamAlign(0).valueOrOne(); VectorType *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -290,7 +289,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, bool HasBranchDivergence, } // Translate a masked store intrinsic, like -// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, // <16 x i1> %mask) // to a chain of basic blocks, that stores element one-by-one if // the appropriate mask bit is set @@ -320,10 +319,9 @@ static void scalarizeMaskedStore(const DataLayout &DL, bool HasBranchDivergence, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptr = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); - const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue(); + const Align AlignVal = CI->getParamAlign(1).valueOrOne(); auto *VecType = cast<VectorType>(Src->getType()); Type *EltTy = VecType->getElementType(); @@ -472,9 +470,8 @@ static void scalarizeMaskedGather(const DataLayout &DL, bool HasBranchDivergence, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Ptrs = CI->getArgOperand(0); - Value *Alignment = CI->getArgOperand(1); - Value *Mask = CI->getArgOperand(2); - Value *Src0 = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(1); + Value *Src0 = CI->getArgOperand(2); auto *VecType = cast<FixedVectorType>(CI->getType()); Type *EltTy = VecType->getElementType(); @@ -483,7 +480,7 @@ static void scalarizeMaskedGather(const DataLayout &DL, Instruction *InsertPt = CI; BasicBlock *IfBlock = CI->getParent(); Builder.SetInsertPoint(InsertPt); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + Align AlignVal = CI->getParamAlign(0).valueOrOne(); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); @@ -608,8 +605,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL, DomTreeUpdater *DTU, bool &ModifiedDT) { Value *Src = CI->getArgOperand(0); Value *Ptrs = CI->getArgOperand(1); - Value *Alignment = CI->getArgOperand(2); - Value *Mask = CI->getArgOperand(3); + Value *Mask = CI->getArgOperand(2); auto *SrcFVTy = cast<FixedVectorType>(Src->getType()); @@ -623,7 +619,7 @@ static void scalarizeMaskedScatter(const DataLayout &DL, Builder.SetInsertPoint(InsertPt); Builder.SetCurrentDebugLocation(CI->getDebugLoc()); - MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue(); + Align AlignVal = CI->getParamAlign(1).valueOrOne(); unsigned VectorWidth = SrcFVTy->getNumElements(); // Shorten the way if the mask is a vector of constants. @@ -1125,8 +1121,7 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, case Intrinsic::masked_load: // Scalarize unsupported vector masked load if (TTI.isLegalMaskedLoad( - CI->getType(), - cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(), + CI->getType(), CI->getParamAlign(0).valueOrOne(), cast<PointerType>(CI->getArgOperand(0)->getType()) ->getAddressSpace())) return false; @@ -1135,18 +1130,15 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, case Intrinsic::masked_store: if (TTI.isLegalMaskedStore( CI->getArgOperand(0)->getType(), - cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue(), + CI->getParamAlign(1).valueOrOne(), cast<PointerType>(CI->getArgOperand(1)->getType()) ->getAddressSpace())) return false; scalarizeMaskedStore(DL, HasBranchDivergence, CI, DTU, ModifiedDT); return true; case Intrinsic::masked_gather: { - MaybeAlign MA = - cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue(); + Align Alignment = CI->getParamAlign(0).valueOrOne(); Type *LoadTy = CI->getType(); - Align Alignment = DL.getValueOrABITypeAlignment(MA, - LoadTy->getScalarType()); if (TTI.isLegalMaskedGather(LoadTy, Alignment) && !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment)) return false; @@ -1154,11 +1146,8 @@ static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, return true; } case Intrinsic::masked_scatter: { - MaybeAlign MA = - cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue(); + Align Alignment = CI->getParamAlign(1).valueOrOne(); Type *StoreTy = CI->getArgOperand(0)->getType(); - Align Alignment = DL.getValueOrABITypeAlignment(MA, - StoreTy->getScalarType()); if (TTI.isLegalMaskedScatter(StoreTy, Alignment) && !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy), Alignment)) diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index fa66a03..23e1243 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -227,6 +227,7 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I, case Instruction::Call: case Instruction::BitCast: case Instruction::PtrToInt: + case Instruction::PtrToAddr: case Instruction::IntToPtr: case Instruction::AddrSpaceCast: case Instruction::FPToUI: |