aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
diff options
context:
space:
mode:
authorRoman Lebedev <lebedev.ri@gmail.com>2021-10-27 19:13:49 +0300
committerRoman Lebedev <lebedev.ri@gmail.com>2021-10-27 19:45:55 +0300
commit156f10c840a07034a6bd638b5912054100365741 (patch)
tree694118dd41eaefea154051c458c8306b9b2a27e8 /llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
parent35c3f5610c969091359a92f31e5a2745f5d49739 (diff)
downloadllvm-156f10c840a07034a6bd638b5912054100365741.zip
llvm-156f10c840a07034a6bd638b5912054100365741.tar.gz
llvm-156f10c840a07034a6bd638b5912054100365741.tar.bz2
[IR] `SCEVExpander::generateOverflowCheck()`: short-circuit `umul_with_overflow`-by-one
It's a no-op, no overflow happens ever: https://alive2.llvm.org/ce/z/Zw89rZ While generally i don't like such hacks, we have a very good reason to do this: here we are expanding a run-time correctness check for the vectorization, and said `umul_with_overflow` will not be optimized out before we query the cost of the checks we've generated. Which means, the cost of run-time checks would be artificially inflated, and after https://reviews.llvm.org/D109368 that will affect the minimal trip count for which these checks are even evaluated. And if they aren't even evaluated, then the vectorized code certainly won't be run. We could consider doing this in IRBuilder, but then we'd need to also teach `CreateExtractValue()` to look into chain of `insertvalue`'s, and i'm not sure there's precedent for that. Refs. https://reviews.llvm.org/D109368#3089809
Diffstat (limited to 'llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp')
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp19
1 files changed, 14 insertions, 5 deletions
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 252eea6..3abd189 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2494,13 +2494,22 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
// Get the backedge taken count and truncate or extended to the AR type.
Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty);
- auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
- Intrinsic::umul_with_overflow, Ty);
// Compute |Step| * Backedge
- CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
- Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
- Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ Value *MulV, *OfMul;
+ if (Step->isOne()) {
+ // Special-case Step of one. Potentially-costly `umul_with_overflow` isn't
+ // needed, there is never an overflow, so to avoid artificially inflating
+ // the cost of the check, directly emit the optimized IR.
+ MulV = TruncTripCount;
+ OfMul = ConstantInt::getFalse(MulV->getContext());
+ } else {
+ auto *MulF = Intrinsic::getDeclaration(Loc->getModule(),
+ Intrinsic::umul_with_overflow, Ty);
+ CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul");
+ MulV = Builder.CreateExtractValue(Mul, 0, "mul.result");
+ OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow");
+ }
// Compute:
// Start + |Step| * Backedge < Start