aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp18
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll37
2 files changed, 52 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4dc9241..71f742e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -18732,6 +18732,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
},
/*MaxVFOnly=*/true, R);
Changed |= HaveVectorizedPhiNodes;
+ if (HaveVectorizedPhiNodes && any_of(PHIToOpcodes, [&](const auto &P) {
+ auto *PHI = dyn_cast<PHINode>(P.first);
+ return !PHI || R.isDeleted(PHI);
+ }))
+ PHIToOpcodes.clear();
VisitedInstrs.insert(Incoming.begin(), Incoming.end());
} while (HaveVectorizedPhiNodes);
@@ -18804,7 +18809,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
// Try to vectorize the incoming values of the PHI, to catch reductions
// that feed into PHIs.
- for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) {
+ for (unsigned I : seq<unsigned>(P->getNumIncomingValues())) {
// Skip if the incoming block is the current BB for now. Also, bypass
// unreachable IR for efficiency and to avoid crashing.
// TODO: Collect the skipped incoming values and try to vectorize them
@@ -18816,9 +18821,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Postponed instructions should not be vectorized here, delay their
// vectorization.
if (auto *PI = dyn_cast<Instruction>(P->getIncomingValue(I));
- PI && !IsInPostProcessInstrs(PI))
- Changed |= vectorizeRootInstruction(nullptr, PI,
+ PI && !IsInPostProcessInstrs(PI)) {
+ bool Res = vectorizeRootInstruction(nullptr, PI,
P->getIncomingBlock(I), R, TTI);
+ Changed |= Res;
+ if (Res && R.isDeleted(P)) {
+ It = BB->begin();
+ E = BB->end();
+ break;
+ }
+ }
}
continue;
}
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll
new file mode 100644
index 0000000..23f64b1e
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer -mtriple=i386-pc-windows-msvc19.34.0 -mcpu=pentium4 < %s | FileCheck %s
+
+define i32 @test(double %mul321.i) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: double [[MUL321_I:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL321_I]], i32 0
+; CHECK-NEXT: br label %[[DO_BODY220_I:.*]]
+; CHECK: [[DO_BODY220_I]]:
+; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[TMP6:%.*]], %[[DO_BODY221_I:.*]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[DO_BODY221_I]]
+; CHECK: [[DO_BODY221_I]]:
+; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
+; CHECK-NEXT: [[ADD318_I:%.*]] = fadd double [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP6]] = insertelement <2 x double> [[TMP0]], double [[ADD318_I]], i32 1
+; CHECK-NEXT: br label %[[DO_BODY220_I]]
+;
+entry:
+ br label %do.body220.i
+
+do.body220.i:
+ %c1.2.i = phi double [ %mul321.i, %do.body221.i ], [ 0.000000e+00, %entry ]
+ %s1.1.i = phi double [ %add318.i, %do.body221.i ], [ 0.000000e+00, %entry ]
+ br label %do.body221.i
+
+do.body221.i: ; preds = %do.body220.i
+ %sub311.i1 = fadd double %c1.2.i, 0.000000e+00
+ %add315.i = fadd double %s1.1.i, 0.000000e+00
+ %mul316.i = fmul double %sub311.i1, 0.000000e+00
+ %mul317.i = fmul double %add315.i, 0.000000e+00
+ %add318.i = fadd double %mul316.i, %mul317.i
+ br label %do.body220.i
+}