aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2019-11-06 17:24:09 -0500
committerSanjay Patel <spatel@rotateright.com>2019-11-07 06:08:42 -0500
commit7ff57705ba196ce649d6034614b3b9df57e1f84f (patch)
treef64f7a3809df4ab32e919b62c531e0a2b7dacccd /llvm/lib
parent55b445150da9101fda07a4c28ee6a4e4bc9fc89a (diff)
downloadllvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.zip
llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.gz
llvm-7ff57705ba196ce649d6034614b3b9df57e1f84f.tar.bz2
[SLP] allow forming 2-way reduction patterns
We have a vector compare reduction problem seen in PR39665 comment 2: https://bugs.llvm.org/show_bug.cgi?id=39665#c2 Or slightly reduced here: define i1 @cmp2(<2 x double> %a0) { %a = fcmp ogt <2 x double> %a0, <double 1.0, double 1.0> %b = extractelement <2 x i1> %a, i32 0 %c = extractelement <2 x i1> %a, i32 1 %d = and i1 %b, %c ret i1 %d } SLP would not attempt to turn this into a vector reduction because there is an artificial lower limit on that transform. We can not completely remove that limit without inducing regressions though, so this patch just hacks an extra attempt at creating a 2-way reduction to the end of the analysis. As shown in the test file, we are still not getting some of the motivating cases, so follow-on patches will be needed to solve those cases. Differential Revision: https://reviews.llvm.org/D59710
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp37
1 files changed, 29 insertions, 8 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c0c1a45..33e388a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6397,7 +6397,7 @@ public:
/// Attempt to vectorize the tree found by
/// matchAssociativeReduction.
- bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
+ bool tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI, bool Try2WayRdx) {
if (ReducedVals.empty())
return false;
@@ -6405,11 +6405,14 @@ public:
// to a nearby power-of-2. Can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
unsigned NumReducedVals = ReducedVals.size();
- if (NumReducedVals < 4)
+ if (Try2WayRdx && NumReducedVals != 2)
+ return false;
+ unsigned MinRdxVals = Try2WayRdx ? 2 : 4;
+ if (NumReducedVals < MinRdxVals)
return false;
unsigned ReduxWidth = PowerOf2Floor(NumReducedVals);
-
+ unsigned MinRdxWidth = Log2_32(MinRdxVals);
Value *VectorizedTree = nullptr;
// FIXME: Fast-math-flags should be set based on the instructions in the
@@ -6433,7 +6436,7 @@ public:
SmallVector<Value *, 16> IgnoreList;
for (auto &V : ReductionOps)
IgnoreList.append(V.begin(), V.end());
- while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > 2) {
+ while (i < NumReducedVals - ReduxWidth + 1 && ReduxWidth > MinRdxWidth) {
auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
V.buildTree(VL, ExternallyUsedValues, IgnoreList);
Optional<ArrayRef<unsigned>> Order = V.bestOrder();
@@ -6759,7 +6762,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
/// performed.
static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
- TargetTransformInfo *TTI,
+ TargetTransformInfo *TTI, bool Try2WayRdx,
const function_ref<bool(Instruction *, BoUpSLP &)> Vectorize) {
if (!ShouldVectorizeHor)
return false;
@@ -6790,7 +6793,7 @@ static bool tryToVectorizeHorReductionOrInstOperands(
if (BI || SI) {
HorizontalReduction HorRdx;
if (HorRdx.matchAssociativeReduction(P, Inst)) {
- if (HorRdx.tryToReduce(R, TTI)) {
+ if (HorRdx.tryToReduce(R, TTI, Try2WayRdx)) {
Res = true;
// Set P to nullptr to avoid re-analysis of phi node in
// matchAssociativeReduction function unless this is the root node.
@@ -6833,7 +6836,8 @@ static bool tryToVectorizeHorReductionOrInstOperands(
bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
BasicBlock *BB, BoUpSLP &R,
- TargetTransformInfo *TTI) {
+ TargetTransformInfo *TTI,
+ bool Try2WayRdx) {
if (!V)
return false;
auto *I = dyn_cast<Instruction>(V);
@@ -6846,7 +6850,7 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
auto &&ExtraVectorization = [this](Instruction *I, BoUpSLP &R) -> bool {
return tryToVectorize(I, R);
};
- return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI,
+ return tryToVectorizeHorReductionOrInstOperands(P, I, BB, R, TTI, Try2WayRdx,
ExtraVectorization);
}
@@ -7042,6 +7046,23 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
PostProcessInstructions.push_back(&*it);
}
+ // Make a final attempt to match a 2-way reduction if nothing else worked.
+ // We do not try this above because it may interfere with other vectorization
+ // attempts.
+ // TODO: The constraints are copied from the above call to
+ // vectorizeRootInstruction(), but that might be too restrictive?
+ BasicBlock::iterator LastInst = --BB->end();
+ if (!Changed && LastInst->use_empty() &&
+ (LastInst->getType()->isVoidTy() || isa<CallInst>(LastInst) ||
+ isa<InvokeInst>(LastInst))) {
+ if (ShouldStartVectorizeHorAtStore || !isa<StoreInst>(LastInst)) {
+ for (auto *V : LastInst->operand_values()) {
+ Changed |= vectorizeRootInstruction(nullptr, V, BB, R, TTI,
+ /* Try2WayRdx */ true);
+ }
+ }
+ }
+
return Changed;
}