aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorNikita Popov <npopov@redhat.com>2024-07-01 09:26:01 +0200
committerGitHub <noreply@github.com>2024-07-01 09:26:01 +0200
commit77eb05683082dd3751ccfab963f5160f1852058d (patch)
tree6a89cd8f3c7ef764d97ce50436d78c0a4ebcff45 /llvm
parent6859e5a169aa235ac04005aaa86ed5ae11372c4c (diff)
downloadllvm-77eb05683082dd3751ccfab963f5160f1852058d.zip
llvm-77eb05683082dd3751ccfab963f5160f1852058d.tar.gz
llvm-77eb05683082dd3751ccfab963f5160f1852058d.tar.bz2
[InstCombine] Simplify select using KnownBits of condition (#95923)
Simplify the arms of a select based on the KnownBits implied by its condition. For now this only handles the case where the select arm folds to a constant, but this can be generalized to handle other patterns by using SimplifyDemandedBits instead (in that case we would also have to limit to non-undef conditions). This is implemented by adding a new member to SimplifyQuery that can be used to inject an additional condition. The affected values are pre-computed and we don't call computeKnownBits() if the select arms don't contain affected values. This reduces the cost in some pathological cases.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Analysis/SimplifyQuery.h17
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp55
-rw-r--r--llvm/test/Transforms/InstCombine/select-binop-cmp.ll5
-rw-r--r--llvm/test/Transforms/InstCombine/select-of-bittest.ll8
-rw-r--r--llvm/test/Transforms/InstCombine/select.ll22
-rw-r--r--llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll164
7 files changed, 165 insertions, 110 deletions
diff --git a/llvm/include/llvm/Analysis/SimplifyQuery.h b/llvm/include/llvm/Analysis/SimplifyQuery.h
index 25b8f9b..a560744 100644
--- a/llvm/include/llvm/Analysis/SimplifyQuery.h
+++ b/llvm/include/llvm/Analysis/SimplifyQuery.h
@@ -9,6 +9,7 @@
#ifndef LLVM_ANALYSIS_SIMPLIFYQUERY_H
#define LLVM_ANALYSIS_SIMPLIFYQUERY_H
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Operator.h"
namespace llvm {
@@ -57,6 +58,15 @@ struct InstrInfoQuery {
}
};
+/// Evaluate query assuming this condition holds.
+struct CondContext {
+ Value *Cond;
+ bool Invert = false;
+ SmallPtrSet<Value *, 4> AffectedValues;
+
+ CondContext(Value *Cond) : Cond(Cond) {}
+};
+
struct SimplifyQuery {
const DataLayout &DL;
const TargetLibraryInfo *TLI = nullptr;
@@ -64,6 +74,7 @@ struct SimplifyQuery {
AssumptionCache *AC = nullptr;
const Instruction *CxtI = nullptr;
const DomConditionCache *DC = nullptr;
+ const CondContext *CC = nullptr;
// Wrapper to query additional information for instructions like metadata or
// keywords like nsw, which provides conservative results if those cannot
@@ -113,6 +124,12 @@ struct SimplifyQuery {
Copy.DC = nullptr;
return Copy;
}
+
+ SimplifyQuery getWithCondContext(const CondContext &CC) const {
+ SimplifyQuery Copy(*this);
+ Copy.CC = &CC;
+ return Copy;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 7deb7bd..c0d49ca 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -771,6 +771,10 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
unsigned Depth, const SimplifyQuery &Q) {
+ // Handle injected condition.
+ if (Q.CC && Q.CC->AffectedValues.contains(V))
+ computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
+
if (!Q.CxtI)
return;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aff691b..979495d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3519,6 +3519,33 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
return false;
}
+/// Check whether the KnownBits of a select arm may be affected by the
+/// select condition.
+static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
+ unsigned Depth) {
+ if (Depth == MaxAnalysisRecursionDepth)
+ return false;
+
+ // Ignore the case where the select arm itself is affected. These cases
+ // are handled more efficiently by other optimizations.
+ if (Depth != 0 && Affected.contains(V))
+ return true;
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (isa<PHINode>(I)) {
+ if (Depth == MaxAnalysisRecursionDepth - 1)
+ return false;
+ Depth = MaxAnalysisRecursionDepth - 2;
+ }
+ return any_of(I->operands(), [&](Value *Op) {
+ return Op->getType()->isIntOrIntVectorTy() &&
+ hasAffectedValue(Op, Affected, Depth + 1);
+ });
+ }
+
+ return false;
+}
+
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -4016,5 +4043,33 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (CondVal->getType() == SI.getType() && isKnownInversion(FalseVal, TrueVal))
return BinaryOperator::CreateXor(CondVal, FalseVal);
+ if (SelType->isIntOrIntVectorTy() &&
+ (!isa<Constant>(TrueVal) || !isa<Constant>(FalseVal))) {
+ // Try to simplify select arms based on KnownBits implied by the condition.
+ CondContext CC(CondVal);
+ findValuesAffectedByCondition(CondVal, /*IsAssume=*/false, [&](Value *V) {
+ CC.AffectedValues.insert(V);
+ });
+ SimplifyQuery Q = SQ.getWithInstruction(&SI).getWithCondContext(CC);
+ if (!CC.AffectedValues.empty()) {
+ if (!isa<Constant>(TrueVal) &&
+ hasAffectedValue(TrueVal, CC.AffectedValues, /*Depth=*/0)) {
+ KnownBits Known = llvm::computeKnownBits(TrueVal, /*Depth=*/0, Q);
+ if (Known.isConstant())
+ return replaceOperand(SI, 1,
+ ConstantInt::get(SelType, Known.getConstant()));
+ }
+
+ CC.Invert = true;
+ if (!isa<Constant>(FalseVal) &&
+ hasAffectedValue(FalseVal, CC.AffectedValues, /*Depth=*/0)) {
+ KnownBits Known = llvm::computeKnownBits(FalseVal, /*Depth=*/0, Q);
+ if (Known.isConstant())
+ return replaceOperand(SI, 2,
+ ConstantInt::get(SelType, Known.getConstant()));
+ }
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
index 1fa0c09a..9ee2bc5 100644
--- a/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/select-binop-cmp.ll
@@ -571,10 +571,7 @@ define <2 x i8> @select_xor_icmp_vec_bad(<2 x i8> %x, <2 x i8> %y, <2 x i8> %z)
define <2 x i32> @vec_select_no_equivalence(<2 x i32> %x) {
; CHECK-LABEL: @vec_select_no_equivalence(
-; CHECK-NEXT: [[X10:%.*]] = shufflevector <2 x i32> [[X:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT: [[COND:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer
-; CHECK-NEXT: [[S:%.*]] = select <2 x i1> [[COND]], <2 x i32> [[X10]], <2 x i32> [[X]]
-; CHECK-NEXT: ret <2 x i32> [[S]]
+; CHECK-NEXT: ret <2 x i32> [[X:%.*]]
;
%x10 = shufflevector <2 x i32> %x, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
%cond = icmp eq <2 x i32> %x, zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
index e3eb76d..50d3c87 100644
--- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll
+++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll
@@ -588,11 +588,9 @@ define i32 @n4(i32 %arg) {
define i32 @n5(i32 %arg) {
; CHECK-LABEL: @n5(
-; CHECK-NEXT: [[T:%.*]] = and i32 [[ARG:%.*]], 2
-; CHECK-NEXT: [[T1:%.*]] = icmp eq i32 [[T]], 0
-; CHECK-NEXT: [[T2:%.*]] = and i32 [[ARG]], 2
-; CHECK-NEXT: [[T3:%.*]] = select i1 [[T1]], i32 [[T2]], i32 1
-; CHECK-NEXT: ret i32 [[T3]]
+; CHECK-NEXT: [[T:%.*]] = lshr i32 [[ARG:%.*]], 1
+; CHECK-NEXT: [[T_LOBIT:%.*]] = and i32 [[T]], 1
+; CHECK-NEXT: ret i32 [[T_LOBIT]]
;
%t = and i32 %arg, 2
%t1 = icmp eq i32 %t, 0
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index b37e917..192d7a9 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -3807,9 +3807,8 @@ define i32 @src_and_eq_neg1_or_xor(i32 %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1
-; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[XOR]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 -1, i32 [[XOR]]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
@@ -3827,9 +3826,8 @@ define i32 @src_and_eq_neg1_xor_or(i32 %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[OR]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[OR]]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
@@ -3942,9 +3940,8 @@ define i32 @src_or_eq_0_and_xor(i32 %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]]
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[AND]], i32 [[XOR]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[XOR]]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
@@ -3962,9 +3959,8 @@ define i32 @src_or_eq_0_xor_and(i32 %x, i32 %y) {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]]
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]]
; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[AND]]
+; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 0, i32 [[AND]]
; CHECK-NEXT: ret i32 [[COND]]
;
entry:
@@ -4474,10 +4470,7 @@ define i32 @src_no_trans_select_or_eq0_or_xor(i32 %x, i32 %y) {
define i32 @src_no_trans_select_or_eq0_and_or(i32 %x, i32 %y) {
; CHECK-LABEL: @src_no_trans_select_or_eq0_and_or(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[OR]]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: ret i32 [[OR]]
;
%or = or i32 %x, %y
%or0 = icmp eq i32 %or, 0
@@ -4489,10 +4482,7 @@ define i32 @src_no_trans_select_or_eq0_and_or(i32 %x, i32 %y) {
define i32 @src_no_trans_select_or_eq0_xor_or(i32 %x, i32 %y) {
; CHECK-LABEL: @src_no_trans_select_or_eq0_xor_or(
; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0
-; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]]
-; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[XOR]], i32 [[OR]]
-; CHECK-NEXT: ret i32 [[COND]]
+; CHECK-NEXT: ret i32 [[OR]]
;
%or = or i32 %x, %y
%or0 = icmp eq i32 %or, 0
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
index 86ca122..fb4545c 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -20,39 +20,37 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]]
+; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2
+; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP0]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
-; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0
+; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
+; CHECK-NEXT: store i64 [[TMP4]], ptr [[TMP3]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
-; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP8:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP8]], i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = or disjoint i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP6]], i32 1
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
+; CHECK-NEXT: store i64 [[TMP8]], ptr [[TMP7]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -61,11 +59,11 @@ define void @interleaved_with_cond_store_0(ptr %p, i64 %x, i64 %n) {
; CHECK: for.body:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
+; CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[P_1]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[TMP10]], [[X]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
; CHECK: if.then:
-; CHECK-NEXT: store i64 [[TMP12]], ptr [[P_1]], align 8
+; CHECK-NEXT: store i64 [[TMP10]], ptr [[P_1]], align 8
; CHECK-NEXT: br label [[IF_MERGE]]
; CHECK: if.merge:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
@@ -114,46 +112,44 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]]
+; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2
+; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i64 1
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP11]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
+; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
-; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0
-; CHECK-NEXT: store i64 [[TMP13]], ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[TMP5]], align 8
+; CHECK-NEXT: [[WIDE_VEC3:%.*]] = load <4 x i64>, ptr [[TMP1]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 0
+; CHECK-NEXT: store i64 [[TMP11]], ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC3]], i64 2
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP3]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -163,15 +159,15 @@ define void @interleaved_with_cond_store_1(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
-; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[P_1]], align 8
-; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], [[X]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
+; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
; CHECK: if.then:
-; CHECK-NEXT: store i64 [[TMP16]], ptr [[P_0]], align 8
+; CHECK-NEXT: store i64 [[TMP14]], ptr [[P_0]], align 8
; CHECK-NEXT: br label [[IF_MERGE]]
; CHECK: if.merge:
-; CHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[P_0]], align 8
-; CHECK-NEXT: store i64 [[TMP18]], ptr [[P_1]], align 8
+; CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr [[P_0]], align 8
+; CHECK-NEXT: store i64 [[TMP16]], ptr [[P_1]], align 8
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -220,43 +216,41 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N:%.*]], 3
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = and i64 [[N]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[TMP0]], i64 2, i64 [[N_MOD_VF]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub nsw i64 [[N]], [[TMP1]]
+; CHECK-NEXT: [[DOTNEG:%.*]] = or i64 [[N]], -2
+; CHECK-NEXT: [[N_VEC:%.*]] = add nsw i64 [[DOTNEG]], [[N]]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
-; CHECK-NEXT: [[TMP2:%.*]] = or disjoint i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
+; CHECK-NEXT: [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP3]], align 8
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: store i64 [[X]], ptr [[TMP3]], align 8
-; CHECK-NEXT: store i64 [[X]], ptr [[TMP4]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP6]], i64 0
-; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK-NEXT: store i64 [[X]], ptr [[TMP1]], align 8
+; CHECK-NEXT: store i64 [[X]], ptr [[TMP2]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
-; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[INDEX]], i32 1
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 0
+; CHECK-NEXT: store i64 [[TMP7]], ptr [[TMP6]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP6]], i64 1
-; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.if1:
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP2]], i32 1
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
-; CHECK-NEXT: store i64 [[TMP12]], ptr [[TMP11]], align 8
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP0]], i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2
+; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -266,12 +260,12 @@ define void @interleaved_with_cond_store_2(ptr %p, i64 %x, i64 %n) {
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[IF_MERGE:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; CHECK-NEXT: [[P_0:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 0
; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[I]], i32 1
-; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[P_1]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[P_1]], align 8
; CHECK-NEXT: store i64 [[X]], ptr [[P_0]], align 8
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[TMP14]], [[X]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[TMP12]], [[X]]
+; CHECK-NEXT: br i1 [[TMP13]], label [[IF_THEN:%.*]], label [[IF_MERGE]]
; CHECK: if.then:
-; CHECK-NEXT: store i64 [[TMP14]], ptr [[P_1]], align 8
+; CHECK-NEXT: store i64 [[TMP12]], ptr [[P_1]], align 8
; CHECK-NEXT: br label [[IF_MERGE]]
; CHECK: if.merge:
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1