diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 185 |
1 files changed, 154 insertions, 31 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 957c97cd..1256e4d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -472,6 +472,26 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { } } +/// Returns true if \p R is dead and can be removed. +static bool isDeadRecipe(VPRecipeBase &R) { + using namespace llvm::PatternMatch; + // Do remove conditional assume instructions as their conditions may be + // flattened. + auto *RepR = dyn_cast<VPReplicateRecipe>(&R); + bool IsConditionalAssume = + RepR && RepR->isPredicated() && + match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>()); + if (IsConditionalAssume) + return true; + + if (R.mayHaveSideEffects()) + return false; + + // Recipe is dead if no user keeps the recipe alive. + return all_of(R.definedValues(), + [](VPValue *V) { return V->getNumUsers() == 0; }); +} + static void removeDeadRecipes(VPlan &Plan) { ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT( Plan.getEntry()); @@ -480,22 +500,8 @@ static void removeDeadRecipes(VPlan &Plan) { // The recipes in the block are processed in reverse order, to catch chains // of dead recipes. for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - // A user keeps R alive: - if (any_of(R.definedValues(), - [](VPValue *V) { return V->getNumUsers(); })) - continue; - - using namespace llvm::PatternMatch; - // Having side effects keeps R alive, but do remove conditional assume - // instructions as their conditions may be flattened. - auto *RepR = dyn_cast<VPReplicateRecipe>(&R); - bool IsConditionalAssume = - RepR && RepR->isPredicated() && - match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>()); - if (R.mayHaveSideEffects() && !IsConditionalAssume) - continue; - - R.eraseFromParent(); + if (isDeadRecipe(R)) + R.eraseFromParent(); } } } @@ -635,6 +641,25 @@ static void removeRedundantExpandSCEVRecipes(VPlan &Plan) { } } +static void recursivelyDeleteDeadRecipes(VPValue *V) { + SmallVector<VPValue *> WorkList; + SmallPtrSet<VPValue *, 8> Seen; + WorkList.push_back(V); + + while (!WorkList.empty()) { + VPValue *Cur = WorkList.pop_back_val(); + if (!Seen.insert(Cur).second) + continue; + VPRecipeBase *R = Cur->getDefiningRecipe(); + if (!R) + continue; + if (!isDeadRecipe(*R)) + continue; + WorkList.append(R->op_begin(), R->op_end()); + R->eraseFromParent(); + } +} + void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { @@ -668,7 +693,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}); + + SmallVector<VPValue *> PossiblyDead(Term->operands()); Term->eraseFromParent(); + for (VPValue *Op : PossiblyDead) + recursivelyDeleteDeadRecipes(Op); ExitingVPBB->appendRecipe(BOC); Plan.setVF(BestVF); Plan.setUF(BestUF); @@ -1180,6 +1209,45 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } +/// Replaces (ICMP_ULE, WideCanonicalIV, backedge-taken-count) pattern using +/// the given \p Idiom. +static void +replaceHeaderPredicateWith(VPlan &Plan, VPValue &Idiom, + function_ref<bool(VPUser &, unsigned)> Cond = {}) { + auto *FoundWidenCanonicalIVUser = + find_if(Plan.getCanonicalIV()->users(), + [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }); + if (FoundWidenCanonicalIVUser == Plan.getCanonicalIV()->users().end()) + return; + auto *WideCanonicalIV = + cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser); + // Walk users of WideCanonicalIV and replace all compares of the form + // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with + // the given idiom VPValue. + VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); + for (VPUser *U : SmallVector<VPUser *>(WideCanonicalIV->users())) { + auto *CompareToReplace = dyn_cast<VPInstruction>(U); + if (!CompareToReplace || + CompareToReplace->getOpcode() != Instruction::ICmp || + CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || + CompareToReplace->getOperand(1) != BTC) + continue; + + assert(CompareToReplace->getOperand(0) == WideCanonicalIV && + "WidenCanonicalIV must be the first operand of the compare"); + if (Cond) { + CompareToReplace->replaceUsesWithIf(&Idiom, Cond); + if (!CompareToReplace->getNumUsers()) + CompareToReplace->eraseFromParent(); + } else { + CompareToReplace->replaceAllUsesWith(&Idiom); + CompareToReplace->eraseFromParent(); + } + } + if (!WideCanonicalIV->getNumUsers()) + WideCanonicalIV->eraseFromParent(); +} + void VPlanTransforms::addActiveLaneMask( VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck) { @@ -1209,20 +1277,77 @@ void VPlanTransforms::addActiveLaneMask( // Walk users of WideCanonicalIV and replace all compares of the form // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an // active-lane-mask. - VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); - for (VPUser *U : SmallVector<VPUser *>(WideCanonicalIV->users())) { - auto *CompareToReplace = dyn_cast<VPInstruction>(U); - if (!CompareToReplace || - CompareToReplace->getOpcode() != Instruction::ICmp || - CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || - CompareToReplace->getOperand(1) != BTC) - continue; + replaceHeaderPredicateWith(Plan, *LaneMask); +} - assert(CompareToReplace->getOperand(0) == WideCanonicalIV && - "WidenCanonicalIV must be the first operand of the compare"); - CompareToReplace->replaceAllUsesWith(LaneMask); - CompareToReplace->eraseFromParent(); +/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and +/// replaces all uses except the canonical IV increment of +/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe +/// is used only for loop iterations counting after this transformation. +/// +/// The function uses the following definitions: +/// %StartV is the canonical induction start value. +/// +/// The function adds the following recipes: +/// +/// vector.ph: +/// ... +/// +/// vector.body: +/// ... +/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], +/// [ %NextEVLIV, %vector.body ] +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// ... +/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi +/// ... +/// +void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) { + VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + auto *CanonicalIVPHI = Plan.getCanonicalIV(); + VPValue *StartV = CanonicalIVPHI->getStartValue(); + + // TODO: revisit this and try to remove the mask operand. + // Walk VPWidenMemoryInstructionRecipe users of WideCanonicalIV and replace + // all compares of the form (ICMP_ULE, WideCanonicalIV, backedge-taken-count), + // used as mask in VPWidenMemoryInstructionRecipe, with an all-true-mask. + Value *TrueMask = + ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext()); + VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask); + replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) { + return isa<VPWidenMemoryInstructionRecipe>(U); + }); + // Now create the ExplicitVectorLengthPhi recipe in the main loop. + auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); + EVLPhi->insertAfter(CanonicalIVPHI); + auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, + {EVLPhi, Plan.getTripCount()}); + VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + + auto *CanonicalIVIncrement = + cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue()); + VPSingleDefRecipe *OpVPEVL = VPEVL; + if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); + IVSize != 32) { + OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc + : Instruction::ZExt, + OpVPEVL, CanonicalIVPHI->getScalarType()); + OpVPEVL->insertBefore(CanonicalIVIncrement); } + auto *NextEVLIV = + new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, + {CanonicalIVIncrement->hasNoUnsignedWrap(), + CanonicalIVIncrement->hasNoSignedWrap()}, + CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); + NextEVLIV->insertBefore(CanonicalIVIncrement); + EVLPhi->addOperand(NextEVLIV); + + // Replace all uses of VPCanonicalIVPHIRecipe by + // VPEVLBasedIVPHIRecipe except for the canonical IV increment. + CanonicalIVPHI->replaceAllUsesWith(EVLPhi); + CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + // TODO: support unroll factor > 1. + Plan.setUF(1); } void VPlanTransforms::dropPoisonGeneratingRecipes( @@ -1248,9 +1373,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // handled. if (isa<VPWidenMemoryInstructionRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) || - isa<VPScalarIVStepsRecipe>(CurRec) || - isa<VPCanonicalIVPHIRecipe>(CurRec) || - isa<VPActiveLaneMaskPHIRecipe>(CurRec)) + isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec)) continue; // This recipe contributes to the address computation of a widen |