diff options
author | Yingwei Zheng <dtcxzyw2333@gmail.com> | 2025-09-02 21:41:02 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-09-02 21:41:02 +0800 |
commit | 89f53af3fffed3e41167fbb7bc10d4885cd97c7f (patch) | |
tree | 419fad91cc9717e3f87b108c9e3fc9895273da3e /llvm | |
parent | 417bdb6672b891000bfa1ec3613074acf03f2616 (diff) | |
download | llvm-89f53af3fffed3e41167fbb7bc10d4885cd97c7f.zip llvm-89f53af3fffed3e41167fbb7bc10d4885cd97c7f.tar.gz llvm-89f53af3fffed3e41167fbb7bc10d4885cd97c7f.tar.bz2 |
[ConstraintElim] Use constraints from bounded memory accesses (#155253)
This patch removes bound checks that are dominated by bounded memory
accesses. For example, if we have an array `int A[5]` and `A[idx]` is
performed successfully, we know that `idx u< 5` after the load.
compile-time impact (+0.1%):
https://llvm-compile-time-tracker.com/compare.php?from=f0e9bba024d44b55d54b02025623ce4a3ba5a37c&to=5227b08a4a514159ec524d1b1ca18ed8ab5407df&stat=instructions%3Au
llvm-opt-benchmark:
https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2709
Proof: https://alive2.llvm.org/ce/z/JEyjA2
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 112 | ||||
-rw-r--r-- | llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll | 373 |
2 files changed, 478 insertions, 7 deletions
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 1ddb8ae..1b4d8c7 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -19,9 +19,11 @@ #include "llvm/Analysis/ConstraintSystem.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" @@ -170,10 +172,12 @@ struct State { DominatorTree &DT; LoopInfo &LI; ScalarEvolution &SE; + TargetLibraryInfo &TLI; SmallVector<FactOrCheck, 64> WorkList; - State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) - : DT(DT), LI(LI), SE(SE) {} + State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, + TargetLibraryInfo &TLI) + : DT(DT), LI(LI), SE(SE), TLI(TLI) {} /// Process block \p BB and add known facts to work-list. void addInfoFor(BasicBlock &BB); @@ -1109,10 +1113,50 @@ void State::addInfoForInductions(BasicBlock &BB) { } } +static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP, + uint64_t AccessSize, + CmpPredicate &Pred, Value *&A, + Value *&B, const DataLayout &DL, + const TargetLibraryInfo &TLI) { + auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL); + if (!Offset.NW.hasNoUnsignedWrap()) + return false; + + if (Offset.VariableOffsets.size() != 1) + return false; + + ObjectSizeOpts Opts; + // Workaround for gep inbounds, ptr null, idx. + Opts.NullIsUnknownSize = true; + // Be conservative since we are not clear on whether an out of bounds access + // to the padding is UB or not. + Opts.RoundToAlign = true; + std::optional<TypeSize> Size = + getBaseObjectSize(Offset.BasePtr, DL, &TLI, Opts); + if (!Size || Size->isScalable()) + return false; + + // Index * Scale + ConstOffset + AccessSize <= AllocSize + // With nuw flag, we know that the index addition doesn't have unsigned wrap. + // If (AllocSize - (ConstOffset + AccessSize)) wraps around, there is no valid + // value for Index. + uint64_t BitWidth = Offset.ConstantOffset.getBitWidth(); + auto &[Index, Scale] = Offset.VariableOffsets.front(); + APInt MaxIndex = (APInt(BitWidth, Size->getFixedValue() - AccessSize, + /*isSigned=*/false, /*implicitTrunc=*/true) - + Offset.ConstantOffset) + .udiv(Scale); + Pred = ICmpInst::ICMP_ULE; + A = Index; + B = ConstantInt::get(Index->getType(), MaxIndex); + return true; +} + void State::addInfoFor(BasicBlock &BB) { addInfoForInductions(BB); + auto &DL = BB.getDataLayout(); - // True as long as long as the current instruction is guaranteed to execute. + // True as long as the current instruction is guaranteed to execute. bool GuaranteedToExecute = true; // Queue conditions and assumes. for (Instruction &I : BB) { @@ -1127,6 +1171,38 @@ void State::addInfoFor(BasicBlock &BB) { continue; } + auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) { + auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP) + return; + TypeSize AccessSize = DL.getTypeStoreSize(AccessType); + if (!AccessSize.isFixed()) + return; + if (GuaranteedToExecute) { + CmpPredicate Pred; + Value *A, *B; + if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(), + Pred, A, B, DL, TLI)) { + // The memory access is guaranteed to execute when BB is entered, + // hence the constraint holds on entry to BB. + WorkList.emplace_back(FactOrCheck::getConditionFact( + DT.getNode(I.getParent()), Pred, A, B)); + } + } else { + WorkList.emplace_back( + FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I)); + } + }; + + if (auto *LI = dyn_cast<LoadInst>(&I)) { + if (!LI->isVolatile()) + AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType()); + } + if (auto *SI = dyn_cast<StoreInst>(&I)) { + if (!SI->isVolatile()) + AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType()); + } + auto *II = dyn_cast<IntrinsicInst>(&I); Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic; switch (ID) { @@ -1420,7 +1496,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A, LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n"); auto R = Info.getConstraintForSolving(Pred, A, B); - if (R.empty() || !R.isValid(Info)){ + if (R.empty() || !R.isValid(Info)) { LLVM_DEBUG(dbgs() << " failed to decompose condition\n"); return std::nullopt; } @@ -1785,12 +1861,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info, static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE, - OptimizationRemarkEmitter &ORE) { + OptimizationRemarkEmitter &ORE, + TargetLibraryInfo &TLI) { bool Changed = false; DT.updateDFSNumbers(); SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args())); ConstraintInfo Info(F.getDataLayout(), FunctionArgs); - State S(DT, LI, SE); + State S(DT, LI, SE, TLI); std::unique_ptr<Module> ReproducerModule( DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); @@ -1960,6 +2037,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, } continue; } + + auto &DL = F.getDataLayout(); + auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) { + CmpPredicate Pred; + Value *A, *B; + if (getConstraintFromMemoryAccess( + *cast<GetElementPtrInst>(Ptr), + DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL, + TLI)) + AddFact(Pred, A, B); + }; + + if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) { + AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType()); + continue; + } + if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) { + AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType()); + continue; + } } Value *A = nullptr, *B = nullptr; @@ -2018,7 +2115,8 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F, auto &LI = AM.getResult<LoopAnalysis>(F); auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F); auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); - if (!eliminateConstraints(F, DT, LI, SE, ORE)) + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll new file mode 100644 index 0000000..8e3862b --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/implied-by-bounded-memory-access.ll @@ -0,0 +1,373 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s + +@g = private unnamed_addr constant [5 x i8] c"test\00" +@g_overaligned = private unnamed_addr constant [5 x i8] c"test\00", align 8 +@g_external = external global [5 x i8] + +declare void @free(ptr allocptr noundef captures(none)) mustprogress nounwind willreturn allockind("free") memory(argmem: readwrite, inaccessiblemem: readwrite) "alloc-family"="malloc" +declare ptr @malloc(i64) mustprogress nofree nounwind willreturn allockind("alloc,uninitialized") allocsize(0) memory(inaccessiblemem: readwrite) "alloc-family"="malloc" +declare void @may_not_return(i1) + +define i8 @load_global(i64 %idx) { +; CHECK-LABEL: define i8 @load_global( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_global_const_offset(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_const_offset( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i8, ptr @g, i64 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[GEP1]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep1 = getelementptr nuw i8, ptr @g, i64 1 + %gep = getelementptr nuw i8, ptr %gep1, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 4 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_global_atomic(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_atomic( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load atomic i8, ptr [[GEP]] unordered, align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + %load = load atomic i8, ptr %gep unordered, align 1 + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i1 @store_global(i64 %idx) { +; CHECK-LABEL: define i1 @store_global( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: store i8 0, ptr [[GEP]], align 1 +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + store i8 0, ptr %gep + %cmp = icmp ult i64 %idx, 5 + ret i1 %cmp +} + +define i1 @store_global_atomic(i64 %idx) { +; CHECK-LABEL: define i1 @store_global_atomic( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: store atomic i8 0, ptr [[GEP]] release, align 1 +; CHECK-NEXT: ret i1 true +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + store atomic i8 0, ptr %gep release, align 1 + %cmp = icmp ult i64 %idx, 5 + ret i1 %cmp +} + +define i8 @load_byval(ptr byval([5 x i8]) %p, i64 %idx) { +; CHECK-LABEL: define i8 @load_byval( +; CHECK-SAME: ptr byval([5 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr %p, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_alloca(i64 %idx) { +; CHECK-LABEL: define i8 @load_alloca( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[ALLOC:%.*]] = alloca [5 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %alloc = alloca [5 x i8], align 1 + call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false) + %gep = getelementptr nuw i8, ptr %alloc, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_malloc(i64 %idx) { +; CHECK-LABEL: define i8 @load_malloc( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[ALLOC:%.*]] = call ptr @malloc(i64 5) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[ALLOC]], ptr @g, i64 5, i1 false) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[ALLOC]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: call void @free(ptr [[ALLOC]]) +; CHECK-NEXT: ret i8 [[ADD]] +; + %alloc = call ptr @malloc(i64 5) + call void @llvm.memcpy.p0.p0.i64(ptr %alloc, ptr @g, i64 5, i1 false) + %gep = getelementptr nuw i8, ptr %alloc, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + call void @free(ptr %alloc) + ret i8 %add +} + +define i32 @load_byval_i32(ptr byval([10 x i8]) %p, i64 %idx) { +; CHECK-LABEL: define i32 @load_byval_i32( +; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep = getelementptr nuw i8, ptr %p, i64 %idx + %load = load i32, ptr %gep + %cmp = icmp ult i64 %idx, 7 + %zext = zext i1 %cmp to i32 + %add = add i32 %load, %zext + ret i32 %add +} + +define i8 @load_global_may_noreturn_dom_bb(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_may_noreturn_dom_bb( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: call void @may_not_return(i1 [[CMP1]]) +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: br label %[[NEXT:.*]] +; CHECK: [[NEXT]]: +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 true to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + %cmp1 = icmp ult i64 %idx, 5 + call void @may_not_return(i1 %cmp1) ; %cmp1 should not be simplified. + %load = load i8, ptr %gep + br label %next + +next: + %cmp2 = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp2 to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +; Negative tests. + +define i8 @load_global_overaligned(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_overaligned( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g_overaligned, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g_overaligned, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_global_external(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_external( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g_external, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g_external, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_from_non_gep(ptr %p, i64 %idx) { +; CHECK-LABEL: define i8 @load_from_non_gep( +; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[P]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %load = load i8, ptr %p + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_global_multi_indices(i64 %idx1, i64 %idx2) { +; CHECK-LABEL: define i8 @load_global_multi_indices( +; CHECK-SAME: i64 [[IDX1:%.*]], i64 [[IDX2:%.*]]) { +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX1]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr nuw i8, ptr [[GEP1]], i64 [[IDX2]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP2]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX1]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep1 = getelementptr nuw i8, ptr @g, i64 %idx1 + %gep2 = getelementptr nuw i8, ptr %gep1, i64 %idx2 + %load = load i8, ptr %gep2 + %cmp = icmp ult i64 %idx1, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i8 @load_global_without_nuw(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_without_nuw( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr i8, ptr @g, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i32 @load_byval_i32_smaller_range(ptr byval([10 x i8]) %p, i64 %idx) { +; CHECK-LABEL: define i32 @load_byval_i32_smaller_range( +; CHECK-SAME: ptr byval([10 x i8]) [[P:%.*]], i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr [[P]], i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 6 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i32 [[ADD]] +; + %gep = getelementptr nuw i8, ptr %p, i64 %idx + %load = load i32, ptr %gep + %cmp = icmp ult i64 %idx, 6 + %zext = zext i1 %cmp to i32 + %add = add i32 %load, %zext + ret i32 %add +} + +define i8 @load_global_volatile(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_volatile( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + %load = load volatile i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} + +define i1 @store_global_volatile(i64 %idx) { +; CHECK-LABEL: define i1 @store_global_volatile( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: store volatile i8 0, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + store volatile i8 0, ptr %gep + %cmp = icmp ult i64 %idx, 5 + ret i1 %cmp +} + +define i8 @load_global_vscale(i64 %idx) { +; CHECK-LABEL: define i8 @load_global_vscale( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr @g, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load <vscale x 1 x i8>, ptr [[GEP]], align 1 +; CHECK-NEXT: [[EXT:%.*]] = extractelement <vscale x 1 x i8> [[LOAD]], i64 0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[EXT]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr @g, i64 %idx + %load = load <vscale x 1 x i8>, ptr %gep + %ext = extractelement <vscale x 1 x i8> %load, i64 0 + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %ext, %zext + ret i8 %add +} + +define i8 @load_from_null(i64 %idx) { +; CHECK-LABEL: define i8 @load_from_null( +; CHECK-SAME: i64 [[IDX:%.*]]) { +; CHECK-NEXT: [[GEP:%.*]] = getelementptr nuw i8, ptr null, i64 [[IDX]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IDX]], 5 +; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD]], [[ZEXT]] +; CHECK-NEXT: ret i8 [[ADD]] +; + %gep = getelementptr nuw i8, ptr null, i64 %idx + %load = load i8, ptr %gep + %cmp = icmp ult i64 %idx, 5 + %zext = zext i1 %cmp to i8 + %add = add i8 %load, %zext + ret i8 %add +} |