diff options
| -rw-r--r-- | llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 12 | ||||
| -rw-r--r-- | llvm/test/Transforms/MemCpyOpt/stack-move.ll | 26 |
2 files changed, 36 insertions, 2 deletions
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 8c9d9c6..a21ee61 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1486,6 +1486,15 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, Instruction *I = Worklist.back(); Worklist.pop_back(); for (const Use &U : I->uses()) { + auto *UI = cast<Instruction>(U.getUser()); + // TODO: We can perform the transformation if we move src alloca to + // before the dominator of all uses. If any use that isn't dominated by + // SrcAlloca exists, non-dominating uses will be produced. + if (!DT->dominates(SrcAlloca, UI)) { + LLVM_DEBUG(dbgs() << "Stack Move: SrcAlloca doesn't dominate all " + "uses for the location, bailing\n"); + return false; + } if (Visited.size() >= MaxUsesToExplore) { LLVM_DEBUG( dbgs() @@ -1499,10 +1508,9 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, return false; case UseCaptureKind::PASSTHROUGH: // Instructions cannot have non-instruction users. - Worklist.push_back(cast<Instruction>(U.getUser())); + Worklist.push_back(UI); continue; case UseCaptureKind::NO_CAPTURE: { - auto *UI = cast<Instruction>(U.getUser()); if (UI->isLifetimeStartOrEnd()) { // We note the locations of these intrinsic calls so that we can // delete them later if the optimization succeeds, this is safe diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move.ll b/llvm/test/Transforms/MemCpyOpt/stack-move.ll index 161a732..f6be486 100644 --- a/llvm/test/Transforms/MemCpyOpt/stack-move.ll +++ b/llvm/test/Transforms/MemCpyOpt/stack-move.ll @@ -981,6 +981,32 @@ bb2: ; Optimization failures follow: +; TODO: we can merge those alloca if we move src alloca to the start of the BB. +; Tests that a the optimization isn't performed, +; when any use that isn't dominated by SrcAlloca exists. +define i32 @use_not_dominated_by_src_alloca() { +; CHECK-LABEL: define i32 @use_not_dominated_by_src_alloca() { +; CHECK-NEXT: [[DEST:%.*]] = alloca i1, align 1 +; CHECK-NEXT: [[DEST_GEP:%.*]] = getelementptr i64, ptr [[DEST]], i64 -1 +; CHECK-NEXT: [[DEST_USE:%.*]] = load i8, ptr [[DEST_GEP]], align 1 +; CHECK-NEXT: [[SRC:%.*]] = alloca i8, align 4 +; CHECK-NEXT: [[SRC_VAL:%.*]] = load i1, ptr [[SRC]], align 4 +; CHECK-NEXT: store i1 [[SRC_VAL]], ptr [[DEST]], align 1 +; CHECK-NEXT: ret i32 0 +; + %dest = alloca i1, align 1 + ; Replacing the use of dest with src causes no domination uses. + %dest.gep = getelementptr i64, ptr %dest, i64 -1 + %dest.use = load i8, ptr %dest.gep, align 1 + %src = alloca i8, align 4 + %src.val = load i1, ptr %src, align 4 + + store i1 %src.val, ptr %dest, align 1 + + ret i32 0 +} + + ; Tests that a memcpy that doesn't completely overwrite a stack value is a use ; for the purposes of liveness analysis, not a definition. define void @incomplete_memcpy() { |
