diff options
author | Nikita Popov <npopov@redhat.com> | 2024-07-01 09:26:01 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-07-01 09:26:01 +0200 |
commit | 77eb05683082dd3751ccfab963f5160f1852058d (patch) | |
tree | 6a89cd8f3c7ef764d97ce50436d78c0a4ebcff45 /llvm | |
parent | 6859e5a169aa235ac04005aaa86ed5ae11372c4c (diff) | |
download | llvm-77eb05683082dd3751ccfab963f5160f1852058d.zip llvm-77eb05683082dd3751ccfab963f5160f1852058d.tar.gz llvm-77eb05683082dd3751ccfab963f5160f1852058d.tar.bz2 |
[InstCombine] Simplify select using KnownBits of condition (#95923)
Simplify the arms of a select based on the KnownBits implied by its condition.
For now this only handles the case where the select arm folds to a constant,
but this can be generalized to handle other patterns by using
SimplifyDemandedBits instead (in that case we would also have to limit to
non-undef conditions).
This is implemented by adding a new member to SimplifyQuery that can be used
to inject an additional condition. The affected values are pre-computed and
we don't call computeKnownBits() if the select arms don't contain affected
values. This reduces the cost in some pathological cases.
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/Analysis/SimplifyQuery.h | 17 | ||||
-rw-r--r-- | llvm/lib/Analysis/ValueTracking.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 55 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/select-binop-cmp.ll | 5 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/select-of-bittest.ll | 8 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/select.ll | 22 | ||||
-rw-r--r-- | llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll | 164 |
7 files changed, 165 insertions, 110 deletions
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h index 25b8f9b..a560744 100644 --- a/llvm/include/llvm/Analysis/SimplifyQuery.h +++ b/llvm/include/llvm/Analysis/SimplifyQuery.h @@ -9,6 +9,7 @@ #ifndef LLVM_ANALYSIS_SIMPLIFYQUERY_H #define LLVM_ANALYSIS_SIMPLIFYQUERY_H +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Operator.h" namespace llvm { @@ -57,6 +58,15 @@ struct InstrInfoQuery { } }; +/// Evaluate query assuming this condition holds. +struct CondContext { + Value *Cond; + bool Invert = false; + SmallPtrSet<Value *, 4> AffectedValues; + + CondContext(Value *Cond) : Cond(Cond) {} +}; + struct SimplifyQuery { const DataLayout &DL; const TargetLibraryInfo *TLI = nullptr; @@ -64,6 +74,7 @@ struct SimplifyQuery { AssumptionCache *AC = nullptr; const Instruction *CxtI = nullptr; const DomConditionCache *DC = nullptr; + const CondContext *CC = nullptr; // Wrapper to query additional information for instructions like metadata or // keywords like nsw, which provides conservative results if those cannot @@ -113,6 +124,12 @@ struct SimplifyQuery { Copy.DC = nullptr; return Copy; } + + SimplifyQuery getWithCondContext(const CondContext &CC) const { + SimplifyQuery Copy(*this); + Copy.CC = &CC; + return Copy; + } }; } // end namespace llvm diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 7deb7bd..c0d49ca 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -771,6 +771,10 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond, void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, unsigned Depth, const SimplifyQuery &Q) { + // Handle injected condition. + if (Q.CC && Q.CC->AffectedValues.contains(V)) + computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert); + if (!Q.CxtI) return; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index aff691b..979495d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3519,6 +3519,33 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0, return false; } +/// Check whether the KnownBits of a select arm may be affected by the +/// select condition. +static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected, + unsigned Depth) { + if (Depth == MaxAnalysisRecursionDepth) + return false; + + // Ignore the case where the select arm itself is affected. These cases + // are handled more efficiently by other optimizations. + if (Depth != 0 && Affected.contains(V)) + return true; + + if (auto *I = dyn_cast<Instruction>(V)) { + if (isa<PHINode>(I)) { + if (Depth == MaxAnalysisRecursionDepth - 1) + return false; + Depth = MaxAnalysisRecursionDepth - 2; + } + return any_of(I->operands(), [&](Value *Op) { + return Op->getType()->isIntOrIntVectorTy() && + hasAffectedValue(Op, Affected, Depth + 1); + }); + } + + return false; +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -4016,5 +4043,33 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (CondVal->getType() == SI.getType() && isKnownInversion(FalseVal, TrueVal)) return BinaryOperator::CreateXor(CondVal, FalseVal); + if (SelType->isIntOrIntVectorTy() && + (!isa<Constant>(TrueVal) || !isa<Constant>(FalseVal))) { + // Try to simplify select arms based on KnownBits implied by the condition. + CondContext CC(CondVal); + findValuesAffectedByCondition(CondVal, /*IsAssume=*/false, [&](Value *V) { + CC.AffectedValues.insert(V); + }); + SimplifyQuery Q = SQ.getWithInstruction(&SI).getWithCondContext(CC); + if (!CC.AffectedValues.empty()) { + if (!isa<Constant>(TrueVal) && + hasAffectedValue(TrueVal, CC.AffectedValues, /*Depth=*/0)) { + KnownBits Known = llvm::computeKnownBits(TrueVal, /*Depth=*/0, Q); + if (Known.isConstant()) + return replaceOperand(SI, 1, + ConstantInt::get(SelType, Known.getConstant())); + } + + CC.Invert = true; + if (!isa<Constant>(FalseVal) && + hasAffectedValue(FalseVal, CC.AffectedValues, /*Depth=*/0)) { + KnownBits Known = llvm::computeKnownBits(FalseVal, /*Depth=*/0, Q); + if (Known.isConstant()) + return replaceOperand(SI, 2, + ConstantInt::get(SelType, Known.getConstant())); + } + } + } + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll index 1fa0c09a..9ee2bc5 100644 --- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll +++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll @@ -571,10 +571,7 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z) define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x) { ; CHECK-LABEL: @vec_select_no_equivalence( -; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer -; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> [[X]] -; CHECK-NEXT: ret <2 x i32> [[S]] +; CHECK-NEXT: ret <2 x i32> [[X:%.*]] ; %x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> <i32 1, i32 0> %cond = icmp eq <2 x i32> %x, zeroinitializer diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index e3eb76d..50d3c87 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -588,11 +588,9 @@ define i32 @n4(i32 %arg) { define i32 @n5(i32 %arg) { ; CHECK-LABEL: @n5( -; CHECK-NEXT: [[T:%.*]] = and i32 [[ARG:%.*]], 2 -; CHECK-NEXT: [[T1:%.*]] = icmp eq i32 [[T]], 0 -; CHECK-NEXT: [[T2:%.*]] = and i32 [[ARG]], 2 -; CHECK-NEXT: [[T3:%.*]] = select i1 [[T1]], i32 [[T2]], i32 1 -; CHECK-NEXT: ret i32 [[T3]] +; CHECK-NEXT: [[T:%.*]] = lshr i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[T_LOBIT:%.*]] = and i32 [[T]], 1 +; CHECK-NEXT: ret i32 [[T_LOBIT]] ; %t = and i32 %arg, 2 %t1 = icmp eq i32 %t, 0 diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index b37e917..192d7a9 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -3807,9 +3807,8 @@ define i32 @src_and_eq_neg1_or_xor(i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1 -; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[XOR]] ; CHECK-NEXT: ret i32 [[COND]] ; entry: @@ -3827,9 +3826,8 @@ define i32 @src_and_eq_neg1_xor_or(i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[OR]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[OR]] ; CHECK-NEXT: ret i32 [[COND]] ; entry: @@ -3942,9 +3940,8 @@ define i32 @src_or_eq_0_and_xor(i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]] ; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[XOR]] ; CHECK-NEXT: ret i32 [[COND]] ; entry: @@ -3962,9 +3959,8 @@ define i32 @src_or_eq_0_xor_and(i32 %x, i32 %y) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[AND]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[AND]] ; CHECK-NEXT: ret i32 [[COND]] ; entry: @@ -4474,10 +4470,7 @@ define i32 @src_no_trans_select_or_eq0_or_xor(i32 %x, i32 %y) { define i32 @src_no_trans_select_or_eq0_and_or(i32 %x, i32 %y) { ; CHECK-LABEL: @src_no_trans_select_or_eq0_and_or( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[OR]] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: ret i32 [[OR]] ; %or = or i32 %x, %y %or0 = icmp eq i32 %or, 0 @@ -4489,10 +4482,7 @@ define i32 @src_no_trans_select_or_eq0_and_or(i32 %x, i32 %y) { define i32 @src_no_trans_select_or_eq0_xor_or(i32 %x, i32 %y) { ; CHECK-LABEL: @src_no_trans_select_or_eq0_xor_or( ; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[XOR]], i32 [[OR]] -; CHECK-NEXT: ret i32 [[COND]] +; CHECK-NEXT: ret i32 [[OR]] ; %or = or i32 %x, %y %or0 = icmp eq i32 %or, 0 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index 86ca122..fb4545c 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -20,39 +20,37 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] +; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2 +; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0 -; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 +; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP3]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 -; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP6]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 +; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -61,11 +59,11 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) { ; CHECK: for.body: ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]] -; CHECK-NEXT: br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]] +; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[P_1]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], [[X]] +; CHECK-NEXT: br i1 [[TMP11]], label [[IF_THEN:%.*]], label [[IF_MERGE]] ; CHECK: if.then: -; CHECK-NEXT: store i64 [[TMP12]], ptr [[P_1]], align 8 +; CHECK-NEXT: store i64 [[TMP10]], ptr [[P_1]], align 8 ; CHECK-NEXT: br label [[IF_MERGE]] ; CHECK: if.merge: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 @@ -114,46 +112,44 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] +; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2 +; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i64 1 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 -; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 +; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: -; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0 -; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2 -; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP5]], align 8 +; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0 +; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2 +; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP3]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -163,15 +159,15 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[P_1]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], [[X]] -; CHECK-NEXT: br i1 [[TMP17]], label [[IF_THEN:%.*]], label [[IF_MERGE]] +; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]] +; CHECK-NEXT: br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]] ; CHECK: if.then: -; CHECK-NEXT: store i64 [[TMP16]], ptr [[P_0]], align 8 +; CHECK-NEXT: store i64 [[TMP14]], ptr [[P_0]], align 8 ; CHECK-NEXT: br label [[IF_MERGE]] ; CHECK: if.merge: -; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[P_0]], align 8 -; CHECK-NEXT: store i64 [[TMP18]], ptr [[P_1]], align 8 +; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[P_0]], align 8 +; CHECK-NEXT: store i64 [[TMP16]], ptr [[P_1]], align 8 ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP5:![0-9]+]] @@ -220,43 +216,41 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]] +; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2 +; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2> -; CHECK-NEXT: store i64 [[X]], ptr [[TMP3]], align 8 -; CHECK-NEXT: store i64 [[X]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i64 0 -; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: store i64 [[X]], ptr [[TMP1]], align 8 +; CHECK-NEXT: store i64 [[X]], ptr [[TMP2]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 -; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0 +; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i64 1 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 -; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP11]], align 8 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 +; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.continue2: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -266,12 +260,12 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] ; CHECK-NEXT: [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0 ; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1 -; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8 ; CHECK-NEXT: store i64 [[X]], ptr [[P_0]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]] -; CHECK-NEXT: br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]] +; CHECK-NEXT: br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]] ; CHECK: if.then: -; CHECK-NEXT: store i64 [[TMP14]], ptr [[P_1]], align 8 +; CHECK-NEXT: store i64 [[TMP12]], ptr [[P_1]], align 8 ; CHECK-NEXT: br label [[IF_MERGE]] ; CHECK: if.merge: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1 |