diff options
-rw-r--r-- | llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 18 | ||||
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll | 37 |
2 files changed, 52 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4dc9241..71f742e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -18732,6 +18732,11 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { }, /*MaxVFOnly=*/true, R); Changed |= HaveVectorizedPhiNodes; + if (HaveVectorizedPhiNodes && any_of(PHIToOpcodes, [&](const auto &P) { + auto *PHI = dyn_cast<PHINode>(P.first); + return !PHI || R.isDeleted(PHI); + })) + PHIToOpcodes.clear(); VisitedInstrs.insert(Incoming.begin(), Incoming.end()); } while (HaveVectorizedPhiNodes); @@ -18804,7 +18809,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { } // Try to vectorize the incoming values of the PHI, to catch reductions // that feed into PHIs. - for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) { + for (unsigned I : seq<unsigned>(P->getNumIncomingValues())) { // Skip if the incoming block is the current BB for now. Also, bypass // unreachable IR for efficiency and to avoid crashing. // TODO: Collect the skipped incoming values and try to vectorize them @@ -18816,9 +18821,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { // Postponed instructions should not be vectorized here, delay their // vectorization. if (auto *PI = dyn_cast<Instruction>(P->getIncomingValue(I)); - PI && !IsInPostProcessInstrs(PI)) - Changed |= vectorizeRootInstruction(nullptr, PI, + PI && !IsInPostProcessInstrs(PI)) { + bool Res = vectorizeRootInstruction(nullptr, PI, P->getIncomingBlock(I), R, TTI); + Changed |= Res; + if (Res && R.isDeleted(P)) { + It = BB->begin(); + E = BB->end(); + break; + } + } } continue; } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll new file mode 100644 index 0000000..23f64b1e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-removed-on-operand-vectorization.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -mtriple=i386-pc-windows-msvc19.34.0 -mcpu=pentium4 < %s | FileCheck %s + +define i32 @test(double %mul321.i) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: double [[MUL321_I:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[MUL321_I]], i32 0 +; CHECK-NEXT: br label %[[DO_BODY220_I:.*]] +; CHECK: [[DO_BODY220_I]]: +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[TMP6:%.*]], %[[DO_BODY221_I:.*]] ], [ zeroinitializer, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[DO_BODY221_I]] +; CHECK: [[DO_BODY221_I]]: +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +; CHECK-NEXT: [[ADD318_I:%.*]] = fadd double [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP6]] = insertelement <2 x double> [[TMP0]], double [[ADD318_I]], i32 1 +; CHECK-NEXT: br label %[[DO_BODY220_I]] +; +entry: + br label %do.body220.i + +do.body220.i: + %c1.2.i = phi double [ %mul321.i, %do.body221.i ], [ 0.000000e+00, %entry ] + %s1.1.i = phi double [ %add318.i, %do.body221.i ], [ 0.000000e+00, %entry ] + br label %do.body221.i + +do.body221.i: ; preds = %do.body220.i + %sub311.i1 = fadd double %c1.2.i, 0.000000e+00 + %add315.i = fadd double %s1.1.i, 0.000000e+00 + %mul316.i = fmul double %sub311.i1, 0.000000e+00 + %mul317.i = fmul double %add315.i, 0.000000e+00 + %add318.i = fadd double %mul316.i, %mul317.i + br label %do.body220.i +} |