diff options
author | Philip Reames <preames@rivosinc.com> | 2024-06-19 08:40:04 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-19 08:40:04 -0700 |
commit | cb76896d6e45e2c9b7ef5e47b6ec37aeca43f7a8 (patch) | |
tree | 8ab6670e0c45fa701ba92b8fa58aeea3c0829288 | |
parent | 1003f5b93e0ab0518e285b861573181942e41930 (diff) | |
download | llvm-cb76896d6e45e2c9b7ef5e47b6ec37aeca43f7a8.zip llvm-cb76896d6e45e2c9b7ef5e47b6ec37aeca43f7a8.tar.gz llvm-cb76896d6e45e2c9b7ef5e47b6ec37aeca43f7a8.tar.bz2 |
[SCEVExpander] Recognize urem idiom during expansion (#96005)
If we have a urem expression, emitting it as a urem is significantly
better that letting the fully expansion kick in. We have the risk of a
udiv or mul which could have previously been shared, but loosing that
seems like a reasonable tradeoff for being able to round trip a urem w/o
modification.
4 files changed, 16 insertions, 9 deletions
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 9808308..7fb7074 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -14972,6 +14972,9 @@ void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { // 4, A / B becomes X / 8). bool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS) { + if (Expr->getType()->isPointerTy()) + return false; + // Try to match 'zext (trunc A to iB) to iY', which is used // for URem with constant power-of-2 second operands. Make sure the size of // the operand A matches the size of the whole expressions. diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index c437a44..c7d758a 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -491,6 +491,16 @@ public: } Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { + // Recognize the canonical representation of an unsimplifed urem. + const SCEV *URemLHS = nullptr; + const SCEV *URemRHS = nullptr; + if (SE.matchURem(S, URemLHS, URemRHS)) { + Value *LHS = expand(URemLHS); + Value *RHS = expand(URemRHS); + return InsertBinop(Instruction::URem, LHS, RHS, SCEV::FlagAnyWrap, + /*IsSafeToHoist*/ false); + } + // Collect all the add operands in a loop, along with their associated loops. // Iterate in reverse so that constants are emitted last, all else equal, and // so that pointer operands are inserted first, which the code below relies on diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll index 95b064b..838b48aa 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll @@ -22,9 +22,7 @@ define i32 @test_pr38847() { ; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i8 [[LSR]], -1 ; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = udiv i32 [[LSR_IV_NEXT2]], 9 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i32 [[TMP0]], 9 -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[LSR_IV_NEXT2]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = urem i32 [[LSR_IV_NEXT2]], 9 ; CHECK-NEXT: ret i32 [[TMP2]] ; entry: @@ -109,9 +107,7 @@ define i32 @test_pr62852() { ; CHECK: exit: ; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT]]) ; CHECK-NEXT: call void @use(i64 [[LSR_IV_NEXT2]]) -; CHECK-NEXT: [[TMP1:%.*]] = udiv i32 [[DEC_1]], 53 -; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i32 [[TMP1]], 53 -; CHECK-NEXT: [[TMP3:%.*]] = sub i32 [[DEC_1]], [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = urem i32 [[DEC_1]], 53 ; CHECK-NEXT: ret i32 [[TMP3]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll index ce5d442..bf687a3 100644 --- a/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll +++ b/llvm/test/Transforms/LoopVectorize/trip-count-expansion-may-introduce-ub.ll @@ -723,9 +723,7 @@ define i64 @multi_exit_4_exit_count_with_urem_by_constant_in_latch(ptr %dst, i64 ; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0) -; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 [[N]], 42 -; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 42 -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = urem i64 [[N]], 42 ; CHECK-NEXT: [[SMAX1:%.*]] = call i64 @llvm.smax.i64(i64 [[TMP2]], i64 0) ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[SMAX]], i64 [[SMAX1]]) ; CHECK-NEXT: [[TMP3:%.*]] = add nuw i64 [[UMIN]], 1 |