diff options
author | Sanjay Patel <spatel@rotateright.com> | 2019-11-06 17:24:09 -0500 |
---|---|---|
committer | Sanjay Patel <spatel@rotateright.com> | 2019-11-07 06:08:42 -0500 |
commit | 7ff57705ba196ce649d6034614b3b9df57e1f84f (patch) | |
tree | f64f7a3809df4ab32e919b62c531e0a2b7dacccd /llvm/lib | |
parent | 55b445150da9101fda07a4c28ee6a4e4bc9fc89a (diff) | |
download | llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.zip llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.gz llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.bz2 |
[SLP] allow forming 2-way reduction patterns
We have a vector compare reduction problem seen in PR39665 comment 2:
https://bugs.llvm.org/show_bug.cgi?id=39665#c2
Or slightly reduced here:
define i1 @cmp2(<2 x double> %a0) {
%a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0>
%b = extractelement <2 x i1> %a, i32 0
%c = extractelement <2 x i1> %a, i32 1
%d = and i1 %b, %c
ret i1 %d
}
SLP would not attempt to turn this into a vector reduction because there is an
artificial lower limit on that transform. We can not completely remove that limit
without inducing regressions though, so this patch just hacks an extra attempt at
creating a 2-way reduction to the end of the analysis.
As shown in the test file, we are still not getting some of the motivating cases,
so follow-on patches will be needed to solve those cases.
Differential Revision: https://reviews.llvm.org/D59710
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 37 |
1 files changed, 29 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c0c1a45..33e388a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6397,7 +6397,7 @@ public: /// Attempt to vectorize the tree found by /// matchAssociativeReduction. - bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { + bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI, bool Try2WayRdx) { if (ReducedVals.empty()) return false; @@ -6405,11 +6405,14 @@ public: // to a nearby power-of-2. Can safely generate oversized // vectors and rely on the backend to split them to legal sizes. unsigned NumReducedVals = ReducedVals.size(); - if (NumReducedVals < 4) + if (Try2WayRdx && NumReducedVals != 2) + return false; + unsigned MinRdxVals = Try2WayRdx ? 2 : 4; + if (NumReducedVals < MinRdxVals) return false; unsigned ReduxWidth = PowerOf2Floor(NumReducedVals); - + unsigned MinRdxWidth = Log2_32(MinRdxVals); Value *VectorizedTree = nullptr; // FIXME: Fast-math-flags should be set based on the instructions in the @@ -6433,7 +6436,7 @@ public: SmallVector<Value *, 16> IgnoreList; for (auto &V : ReductionOps) IgnoreList.append(V.begin(), V.end()); - while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) { + while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > MinRdxWidth) { auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth); V.buildTree(VL, ExternallyUsedValues, IgnoreList); Optional<ArrayRef<unsigned>> Order = V.bestOrder(); @@ -6759,7 +6762,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P, /// performed. static bool tryToVectorizeHorReductionOrInstOperands( PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI, + TargetTransformInfo *TTI, bool Try2WayRdx, const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) { if (!ShouldVectorizeHor) return false; @@ -6790,7 +6793,7 @@ static bool tryToVectorizeHorReductionOrInstOperands( if (BI || SI) { HorizontalReduction HorRdx; if (HorRdx.matchAssociativeReduction(P, Inst)) { - if (HorRdx.tryToReduce(R, TTI)) { + if (HorRdx.tryToReduce(R, TTI, Try2WayRdx)) { Res = true; // Set P to nullptr to avoid re-analysis of phi node in // matchAssociativeReduction function unless this is the root node. @@ -6833,7 +6836,8 @@ static bool tryToVectorizeHorReductionOrInstOperands( bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, BasicBlock *BB, BoUpSLP &R, - TargetTransformInfo *TTI) { + TargetTransformInfo *TTI, + bool Try2WayRdx) { if (!V) return false; auto *I = dyn_cast<Instruction>(V); @@ -6846,7 +6850,7 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V, auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool { return tryToVectorize(I, R); }; - return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, + return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, Try2WayRdx, ExtraVectorization); } @@ -7042,6 +7046,23 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { PostProcessInstructions.push_back(&*it); } + // Make a final attempt to match a 2-way reduction if nothing else worked. + // We do not try this above because it may interfere with other vectorization + // attempts. + // TODO: The constraints are copied from the above call to + // vectorizeRootInstruction(), but that might be too restrictive? + BasicBlock::iterator LastInst = --BB->end(); + if (!Changed && LastInst->use_empty() && + (LastInst->getType()->isVoidTy() || isa<CallInst>(LastInst) || + isa<InvokeInst>(LastInst))) { + if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(LastInst)) { + for (auto *V : LastInst->operand_values()) { + Changed |= vectorizeRootInstruction(nullptr, V, BB, R, TTI, + /* Try2WayRdx */ true); + } + } + } + return Changed; } |