[VP][fix] Don't discard masks in reductions

When expanding VP reductions to non VP-code, the reduction pass was ignoring the mask before. Fix this by keeping the mask and selecting neutral elements where the mask is zero. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D126362
author: Simon Moll <moll@cs.uni-saarland.de> 2022-05-25 15:08:31 +0200
committer: Simon Moll <moll@cs.uni-saarland.de> 2022-05-25 15:54:45 +0200
commit: 6e12711081d76fea8dc2f4d9579cfdf379497bbf (patch)
tree: bdcb111867463c3caf9a17a0aa6af3e372c40559 /llvm/lib/CodeGen/ExpandVectorPredication.cpp
parent: d8dda57ae7798a052e52ef5979980e815d2e4bc5 (diff)
download: llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.zip
llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.tar.gz
llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.tar.bz2
1 files changed, 16 insertions, 7 deletions
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 088e683..aa52914 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -113,6 +113,17 @@ static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
   OldOp.eraseFromParent();
 }
 
+static bool maySpeculateLanes(VPIntrinsic &VPI) {
+  // The result of VP reductions depends on the mask and evl.
+  if (isa<VPReductionIntrinsic>(VPI))
+    return false;
+  // Fallback to whether the intrinsic is speculatable.
+  // FIXME: Check whether the replacing non-VP code will be speculatable
+  //        instead. VP intrinsics themselves are never speculatable because of
+  //        UB if %evl is greater than the runtime vector length.
+  return isSafeToSpeculativelyExecute(cast<Operator>(&VPI));
+}
+
 //// } Helpers
 
 namespace {
@@ -216,8 +227,7 @@ Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
 Value *
 CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
                                                      VPIntrinsic &VPI) {
-  assert((isSafeToSpeculativelyExecute(&VPI) ||
-          VPI.canIgnoreVectorLengthParam()) &&
+  assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
          "Implicitly dropping %evl in non-speculatable operator!");
 
   auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
@@ -296,8 +306,7 @@ static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
 Value *
 CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder,
                                                 VPReductionIntrinsic &VPI) {
-  assert((isSafeToSpeculativelyExecute(&VPI) ||
-          VPI.canIgnoreVectorLengthParam()) &&
+  assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
          "Implicitly dropping %evl in non-speculatable operator!");
 
   Value *Mask = VPI.getMaskParam();
@@ -471,9 +480,9 @@ struct TransformJob {
   bool isDone() const { return Strategy.shouldDoNothing(); }
 };
 
-void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
+void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) {
   // Speculatable instructions do not strictly need predication.
-  if (isSafeToSpeculativelyExecute(&I)) {
+  if (maySpeculateLanes(VPI)) {
     // Converting a speculatable VP intrinsic means dropping %mask and %evl.
     // No need to expand %evl into the %mask only to ignore that code.
     if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
@@ -518,7 +527,7 @@ bool CachingVPExpander::expandVectorPredication() {
     if (!VPI)
       continue;
     auto VPStrat = getVPLegalizationStrategy(*VPI);
-    sanitizeStrategy(I, VPStrat);
+    sanitizeStrategy(*VPI, VPStrat);
     if (!VPStrat.shouldDoNothing())
       Worklist.emplace_back(VPI, VPStrat);
   }
author	Simon Moll <moll@cs.uni-saarland.de>	2022-05-25 15:08:31 +0200
committer	Simon Moll <moll@cs.uni-saarland.de>	2022-05-25 15:54:45 +0200
commit	6e12711081d76fea8dc2f4d9579cfdf379497bbf (patch)
tree	bdcb111867463c3caf9a17a0aa6af3e372c40559 /llvm/lib/CodeGen/ExpandVectorPredication.cpp
parent	d8dda57ae7798a052e52ef5979980e815d2e4bc5 (diff)
download	llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.zip llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.tar.gz llvm-6e12711081d76fea8dc2f4d9579cfdf379497bbf.tar.bz2