From d57d73daed3057ff48a1e6476a681b2ad46c268c Mon Sep 17 00:00:00 2001 From: Dorit Nuzman Date: Thu, 15 Aug 2019 07:12:14 +0000 Subject: [LV] fold-tail predication should be respected even with assume_safety assume_safety implies that loads under "if's" can be safely executed speculatively (unguarded, unmasked). However this assumption holds only for the original user "if's", not those introduced by the compiler, such as the fold-tail "if" that guards us from loading beyond the original loop trip-count. Currently the combination of fold-tail and assume-safety pragmas results in ignoring the fold-tail predicate that guards the loads, generating unmasked loads. This patch fixes this behavior. Differential Revision: https://reviews.llvm.org/D66106 Reviewers: Ayal, hsaito, fhahn llvm-svn: 368973 --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp | 8 ++++---- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 8b311fa..3ac1234 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -869,7 +869,7 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { } bool LoopVectorizationLegality::blockCanBePredicated( - BasicBlock *BB, SmallPtrSetImpl &SafePtrs) { + BasicBlock *BB, SmallPtrSetImpl &SafePtrs, bool PreserveGuards) { const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { @@ -888,7 +888,7 @@ bool LoopVectorizationLegality::blockCanBePredicated( // !llvm.mem.parallel_loop_access implies if-conversion safety. // Otherwise, record that the load needs (real or emulated) masking // and let the cost model decide. - if (!IsAnnotatedParallel) + if (!IsAnnotatedParallel || PreserveGuards) MaskedOp.insert(LI); continue; } @@ -1159,7 +1159,7 @@ bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { return Result; } -bool LoopVectorizationLegality::canFoldTailByMasking() { +bool LoopVectorizationLegality::prepareToFoldTailByMasking() { LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); @@ -1202,7 +1202,7 @@ bool LoopVectorizationLegality::canFoldTailByMasking() { // Check and mark all blocks for predication, including those that ordinarily // do not need predication such as the header block. for (BasicBlock *BB : TheLoop->blocks()) { - if (!blockCanBePredicated(BB, SafePointers)) { + if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { reportVectorizationFailure( "Cannot fold tail by masking as required", "control flow cannot be substituted for a select", diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2030541..dac48a1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4853,7 +4853,7 @@ Optional LoopVectorizationCostModel::computeMaxVF() { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - if (Legal->canFoldTailByMasking()) { + if (Legal->prepareToFoldTailByMasking()) { FoldTailByMasking = true; return MaxVF; } -- cgit v1.1