diff options
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/IPO/SampleProfile.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp | 7 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 47 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 19 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 18 |
8 files changed, 78 insertions, 41 deletions
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index bd74388..8e76b79 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -83,7 +83,6 @@ #include <cstdint> #include <functional> #include <limits> -#include <map> #include <memory> #include <queue> #include <string> diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 27ec6c6..5bc9c28 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5627,8 +5627,15 @@ bool InstCombinerImpl::run() { for (Use &U : I->uses()) { User *User = U.getUser(); - if (User->isDroppable()) - continue; + if (User->isDroppable()) { + // Do not sink if there are dereferenceable assumes that would be + // removed. + auto II = dyn_cast<IntrinsicInst>(User); + if (II->getIntrinsicID() != Intrinsic::assume || + !II->getOperandBundle("dereferenceable")) + continue; + } + if (NumUsers > MaxSinkNumUsers) return std::nullopt; diff --git a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp index a577f51..4a7144f 100644 --- a/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp +++ b/llvm/lib/Transforms/Scalar/DropUnnecessaryAssumes.cpp @@ -78,11 +78,16 @@ DropUnnecessaryAssumesPass::run(Function &F, FunctionAnalysisManager &FAM) { SmallVector<OperandBundleDef> KeptBundles; unsigned NumBundles = Assume->getNumOperandBundles(); for (unsigned I = 0; I != NumBundles; ++I) { - auto IsDead = [](OperandBundleUse Bundle) { + auto IsDead = [&](OperandBundleUse Bundle) { // "ignore" operand bundles are always dead. if (Bundle.getTagName() == "ignore") return true; + // "dereferenceable" operand bundles are only dropped if requested + // (e.g., after loop vectorization has run). + if (Bundle.getTagName() == "dereferenceable") + return DropDereferenceable; + // Bundles without arguments do not affect any specific values. // Always keep them for now. if (Bundle.Inputs.empty()) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 666033b..566d6ea 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1232,6 +1232,30 @@ public: /// Superset of instructions that return true for isScalarWithPredication. bool isPredicatedInst(Instruction *I) const; + /// A helper function that returns how much we should divide the cost of a + /// predicated block by. Typically this is the reciprocal of the block + /// probability, i.e. if we return X we are assuming the predicated block will + /// execute once for every X iterations of the loop header so the block should + /// only contribute 1/X of its cost to the total cost calculation, but when + /// optimizing for code size it will just be 1 as code size costs don't depend + /// on execution probabilities. + /// + /// TODO: We should use actual block probability here, if available. + /// Currently, we always assume predicated blocks have a 50% chance of + /// executing, apart from blocks that are only predicated due to tail folding. + inline unsigned + getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind, + BasicBlock *BB) const { + // If a block wasn't originally predicated but was predicated due to + // e.g. tail folding, don't divide the cost. Tail folded loops may still be + // predicated in the final vector loop iteration, but for most loops that + // don't have low trip counts we can expect their probability to be close to + // zero. + if (!Legal->blockNeedsPredication(BB)) + return 1; + return CostKind == TTI::TCK_CodeSize ? 1 : 2; + } + /// Return the costs for our two available strategies for lowering a /// div/rem operation which requires speculating at least one lane. /// First result is for scalarization (will be invalid for scalable @@ -2887,7 +2911,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally // likely. - ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor(CostKind); + ScalarizationCost = + ScalarizationCost / getPredBlockCostDivisor(CostKind, I->getParent()); } InstructionCost SafeDivisorCost = 0; @@ -5032,7 +5057,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( } // Scale the total scalar cost by block probability. - ScalarCost /= getPredBlockCostDivisor(CostKind); + ScalarCost /= getPredBlockCostDivisor(CostKind, I->getParent()); // Compute the discount. A non-negative discount means the vector version // of the instruction costs more, and scalarizing would be beneficial. @@ -5082,10 +5107,11 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) { // stores and instructions that may divide by zero) will now be // unconditionally executed. For the scalar case, we may not always execute // the predicated block, if it is an if-else block. Thus, scale the block's - // cost by the probability of executing it. blockNeedsPredication from - // Legal is used so as to not include all blocks in tail folded loops. - if (VF.isScalar() && Legal->blockNeedsPredication(BB)) - BlockCost /= getPredBlockCostDivisor(CostKind); + // cost by the probability of executing it. + // getPredBlockCostDivisor will return 1 for blocks that are only predicated + // by the header mask when folding the tail. + if (VF.isScalar()) + BlockCost /= getPredBlockCostDivisor(CostKind, BB); Cost += BlockCost; } @@ -5164,7 +5190,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, // conditional branches, but may not be executed for each vector lane. Scale // the cost by the probability of executing the predicated block. if (isPredicatedInst(I)) { - Cost /= getPredBlockCostDivisor(CostKind); + Cost /= getPredBlockCostDivisor(CostKind, I->getParent()); // Add the cost of an i1 extract and a branch auto *VecI1Ty = @@ -6732,6 +6758,10 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { SkipCostComputation.contains(UI); } +unsigned VPCostContext::getPredBlockCostDivisor(BasicBlock *BB) const { + return CM.getPredBlockCostDivisor(CostKind, BB); +} + InstructionCost LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, VPCostContext &CostCtx) const { @@ -8157,9 +8187,10 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, return new VPWidenSelectRecipe(*cast<SelectInst>(Instr), R->operands()); if (Instruction::isCast(VPI->getOpcode())) { + auto *CastR = cast<VPInstructionWithType>(R); auto *CI = cast<CastInst>(Instr); return new VPWidenCastRecipe(CI->getOpcode(), VPI->getOperand(0), - CI->getType(), *CI); + CastR->getResultType(), *CI); } return tryToWiden(VPI); diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index f405c40..663e31a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/MDBuilder.h" #define DEBUG_TYPE "vplan" @@ -233,10 +234,15 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB, for (Value *Op : Inst->operands()) VPOperands.push_back(getOrCreateVPOperand(Op)); - // Build VPInstruction for any arbitrary Instruction without specific - // representation in VPlan. - NewR = cast<VPInstruction>( - VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst)); + if (auto *CI = dyn_cast<CastInst>(Inst)) { + NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0], + CI->getType(), CI->getDebugLoc()); + NewR->setUnderlyingValue(CI); + } else { + // Build VPInstruction for any arbitrary Instruction without specific + // representation in VPlan. + NewR = VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst); + } } IRDef2VPValue[Inst] = NewR; diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 965426f..caabfa7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -50,21 +50,6 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF); Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step); -/// A helper function that returns how much we should divide the cost of a -/// predicated block by. Typically this is the reciprocal of the block -/// probability, i.e. if we return X we are assuming the predicated block will -/// execute once for every X iterations of the loop header so the block should -/// only contribute 1/X of its cost to the total cost calculation, but when -/// optimizing for code size it will just be 1 as code size costs don't depend -/// on execution probabilities. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -inline unsigned -getPredBlockCostDivisor(TargetTransformInfo::TargetCostKind CostKind) { - return CostKind == TTI::TCK_CodeSize ? 1 : 2; -} - /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -367,6 +352,10 @@ struct VPCostContext { /// has already been pre-computed. bool skipCostComputation(Instruction *UI, bool IsVector) const; + /// \returns how much the cost of a predicated block should be divided by. + /// Forwards to LoopVectorizationCostModel::getPredBlockCostDivisor. + unsigned getPredBlockCostDivisor(BasicBlock *BB) const; + /// Returns the OperandInfo for \p V, if it is a live-in. TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const; diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 80cd112..707886f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3349,7 +3349,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, // Scale the cost by the probability of executing the predicated blocks. // This assumes the predicated block for each vector lane is equally // likely. - ScalarCost /= getPredBlockCostDivisor(Ctx.CostKind); + ScalarCost /= Ctx.getPredBlockCostDivisor(UI->getParent()); return ScalarCost; } case Instruction::Load: diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 634df51..b319fbc7 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1745,17 +1745,17 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF, if (match(Term, m_BranchOnCount()) || match(Term, m_BranchOnCond(m_Not(m_ActiveLaneMask( m_VPValue(), m_VPValue(), m_VPValue()))))) { - // Try to simplify the branch condition if TC <= VF * UF when the latch - // terminator is BranchOnCount or BranchOnCond where the input is - // Not(ActiveLaneMask). - const SCEV *TripCount = - vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); - assert(!isa<SCEVCouldNotCompute>(TripCount) && + // Try to simplify the branch condition if VectorTC <= VF * UF when the + // latch terminator is BranchOnCount or BranchOnCond(Not(ActiveLaneMask)). + const SCEV *VectorTripCount = + vputils::getSCEVExprForVPValue(&Plan.getVectorTripCount(), SE); + if (isa<SCEVCouldNotCompute>(VectorTripCount)) + VectorTripCount = vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE); + assert(!isa<SCEVCouldNotCompute>(VectorTripCount) && "Trip count SCEV must be computable"); ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF); - const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements); - if (TripCount->isZero() || - !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C)) + const SCEV *C = SE.getElementCount(VectorTripCount->getType(), NumElements); + if (!SE.isKnownPredicate(CmpInst::ICMP_ULE, VectorTripCount, C)) return false; } else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) { // For BranchOnCond, check if we can prove the condition to be true using VF |
