aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h11
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp9
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp18
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll81
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll8
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll108
-rw-r--r--llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll6
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll21
-rw-r--r--llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll28
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll90
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vw-web-simplification.ll55
11 files changed, 93 insertions, 342 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index a98e46c5..3abdafa 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -953,8 +953,17 @@ public:
}
/// Insert \p SubVec at the \p Idx element of \p Vec.
+ /// If \p SkipUndef is true and \p SubVec is UNDEF/POISON, then \p Vec is
+ /// returned.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec,
- unsigned Idx) {
+ unsigned Idx, bool SkipUndef = false) {
+ // Skipping insert of UNDEF could result in POISON elements remaining in the
+ // resulting vector. The SkipUndef is useful in situations when getNode
+ // can't reason well enough about ignoring the insert, e.g. when having
+ // scalable vectors and the user of this method knows that the subvector
+ // being replaced isn't POISON.
+ if (SkipUndef && SubVec.isUndef())
+ return Vec;
return getNode(ISD::INSERT_SUBVECTOR, DL, Vec.getValueType(), Vec, SubVec,
getVectorIdxConstant(Idx, DL));
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81..761f7ea 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15125,11 +15125,14 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (PreferDUPAndInsert) {
// First, build a constant vector with the common element.
- SmallVector<SDValue, 8> Ops(NumElts, Value);
+ // Make sure to freeze the common element first, since we will use it also
+ // for indices that should be UNDEF (so we want to avoid making those
+ // elements more poisonous).
+ SmallVector<SDValue, 8> Ops(NumElts, DAG.getFreeze(Value));
SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, DL, Ops), DAG);
// Next, insert the elements that do not match the common value.
for (unsigned I = 0; I < NumElts; ++I)
- if (Op.getOperand(I) != Value)
+ if (Op.getOperand(I) != Value && !Op.getOperand(I).isUndef())
NewVector =
DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NewVector,
Op.getOperand(I), DAG.getConstant(I, DL, MVT::i64));
@@ -28721,7 +28724,7 @@ static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
"Expected a fixed length vector operand!");
SDLoc DL(V);
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getPOISON(VT), V, Zero);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4f280c3..55e352a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2859,7 +2859,7 @@ static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
assert(V.getValueType().isFixedLengthVector() &&
"Expected a fixed length vector operand!");
SDLoc DL(V);
- return DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), V, 0);
+ return DAG.getInsertSubvector(DL, DAG.getPOISON(VT), V, 0);
}
// Shrink V so it's just big enough to maintain a VT's worth of data.
@@ -4347,7 +4347,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
DAG.getNode(ISD::BUILD_VECTOR, DL, OneRegVT, OneVRegOfOps);
SubBV = convertToScalableVector(M1VT, SubBV, DAG, Subtarget);
unsigned InsertIdx = (i / ElemsPerVReg) * NumOpElts;
- Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx);
+ Vec = DAG.getInsertSubvector(DL, Vec, SubBV, InsertIdx,
+ /*SkipUndef=*/true);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
@@ -7849,10 +7850,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Vec = DAG.getUNDEF(VT);
for (const auto &OpIdx : enumerate(Op->ops())) {
SDValue SubVec = OpIdx.value();
- // Don't insert undef subvectors.
- if (SubVec.isUndef())
- continue;
- Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts);
+ Vec = DAG.getInsertSubvector(DL, Vec, SubVec, OpIdx.index() * NumOpElts,
+ /*SkipUndef=*/true);
}
return Vec;
}
@@ -12272,9 +12271,10 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
// Reassemble the low and high pieces reversed.
// FIXME: This is a CONCAT_VECTORS.
- SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0);
- return DAG.getInsertSubvector(DL, Res, Lo,
- LoVT.getVectorMinNumElements());
+ SDValue Res = DAG.getInsertSubvector(DL, DAG.getUNDEF(VecVT), Hi, 0,
+ /*SkipUndef=*/true);
+ return DAG.getInsertSubvector(DL, Res, Lo, LoVT.getVectorMinNumElements(),
+ /*SkipUndef=*/true);
}
// Just promote the int type to i16 which will double the LMUL.
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
index 2905d70..9efe0b3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll
@@ -37,10 +37,6 @@ define void @select_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -63,15 +59,8 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z2.h, z3.h
-; VBITS_GE_256-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.h
-; VBITS_GE_256-NEXT: mov z5.h, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
-; VBITS_GE_256-NEXT: and z5.h, z5.h, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z4.h, #0
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z5.h, #0
-; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z1.h
-; VBITS_GE_256-NEXT: sel z1.h, p1, z2.h, z3.h
+; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h
+; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -82,10 +71,6 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
-; VBITS_GE_512-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.h
-; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -104,10 +89,6 @@ define void @select_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -126,10 +107,6 @@ define void @select_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -173,10 +150,6 @@ define void @select_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -199,15 +172,8 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s
-; VBITS_GE_256-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.s
-; VBITS_GE_256-NEXT: mov z5.s, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
-; VBITS_GE_256-NEXT: and z5.s, z5.s, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z4.s, #0
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z5.s, #0
-; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z1.s
-; VBITS_GE_256-NEXT: sel z1.s, p1, z2.s, z3.s
+; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s
+; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -218,10 +184,6 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
-; VBITS_GE_512-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.s
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -240,10 +202,6 @@ define void @select_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -262,10 +220,6 @@ define void @select_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -310,10 +264,6 @@ define void @select_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -336,15 +286,8 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z2.d, z3.d
-; VBITS_GE_256-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.d
-; VBITS_GE_256-NEXT: mov z5.d, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
-; VBITS_GE_256-NEXT: and z5.d, z5.d, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.d, p1/z, z4.d, #0
-; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z5.d, #0
-; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z1.d
-; VBITS_GE_256-NEXT: sel z1.d, p1, z2.d, z3.d
+; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d
+; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -355,10 +298,6 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.d
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -377,10 +316,6 @@ define void @select_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -399,10 +334,6 @@ define void @select_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
index 2f76be6..5e94007 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests.ll
@@ -12,12 +12,12 @@ define void @foo(ptr %a) #0 {
; CHECK: SelectionDAG has 13 nodes:
; CHECK-NEXT: t0: ch,glue = EntryToken
; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0
-; CHECK-NEXT: t21: nxv2i64,ch = LDR_ZXI<Mem:(volatile load (<vscale x 1 x s128>) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0
+; CHECK-NEXT: t22: nxv2i64,ch = LDR_ZXI<Mem:(volatile load (<vscale x 1 x s128>) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0
; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0>
; CHECK-NEXT: t6: i64 = ADDXri TargetFrameIndex:i64<0>, TargetConstant:i32<0>, TargetConstant:i32<0>
-; CHECK-NEXT: t22: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r0, align 64)> t21, t6, TargetConstant:i64<0>, t21:1
-; CHECK-NEXT: t23: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r1, align 64)> t21, t8, TargetConstant:i64<0>, t22
-; CHECK-NEXT: t10: ch = RET_ReallyLR t23
+; CHECK-NEXT: t23: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r0, align 64)> t22, t6, TargetConstant:i64<0>, t22:1
+; CHECK-NEXT: t24: ch = STR_ZXI<Mem:(volatile store (<vscale x 1 x s128>) into %ir.r1, align 64)> t22, t8, TargetConstant:i64<0>, t23
+; CHECK-NEXT: t10: ch = RET_ReallyLR t24
; CHECK-EMPTY:
entry:
%r0 = alloca <8 x i64>
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
index 0e95da3..9cebbc4 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll
@@ -36,10 +36,6 @@ define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: and z2.b, z2.b, #0x1
-; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
@@ -62,15 +58,8 @@ define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b
-; VBITS_GE_256-NEXT: mov z4.b, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.b
-; VBITS_GE_256-NEXT: mov z5.b, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.b, z4.b, #0x1
-; VBITS_GE_256-NEXT: and z5.b, z5.b, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.b, p1/z, z4.b, #0
-; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z5.b, #0
-; VBITS_GE_256-NEXT: sel z0.b, p2, z0.b, z1.b
-; VBITS_GE_256-NEXT: sel z1.b, p1, z2.b, z3.b
+; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z1.b
+; VBITS_GE_256-NEXT: sel z1.b, p2, z2.b, z3.b
; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8]
; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -81,10 +70,6 @@ define void @select_v64i8(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
-; VBITS_GE_512-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.b
-; VBITS_GE_512-NEXT: and z2.b, z2.b, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0
; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b
; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -103,10 +88,6 @@ define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: and z2.b, z2.b, #0x1
-; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
@@ -125,10 +106,6 @@ define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b
-; CHECK-NEXT: mov z2.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.b
-; CHECK-NEXT: and z2.b, z2.b, #0x1
-; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0
; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
; CHECK-NEXT: ret
@@ -172,10 +149,6 @@ define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -198,15 +171,8 @@ define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h
-; VBITS_GE_256-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.h
-; VBITS_GE_256-NEXT: mov z5.h, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
-; VBITS_GE_256-NEXT: and z5.h, z5.h, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z4.h, #0
-; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z5.h, #0
-; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z1.h
-; VBITS_GE_256-NEXT: sel z1.h, p1, z2.h, z3.h
+; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h
+; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -217,10 +183,6 @@ define void @select_v32i16(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
-; VBITS_GE_512-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.h
-; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -239,10 +201,6 @@ define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -261,10 +219,6 @@ define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h
-; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.h
-; CHECK-NEXT: and z2.h, z2.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
; CHECK-NEXT: ret
@@ -308,10 +262,6 @@ define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -334,15 +284,8 @@ define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z2.s, z3.s
-; VBITS_GE_256-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.s
-; VBITS_GE_256-NEXT: mov z5.s, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
-; VBITS_GE_256-NEXT: and z5.s, z5.s, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z4.s, #0
-; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z5.s, #0
-; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z1.s
-; VBITS_GE_256-NEXT: sel z1.s, p1, z2.s, z3.s
+; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s
+; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -353,10 +296,6 @@ define void @select_v16i32(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
-; VBITS_GE_512-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.s
-; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -375,10 +314,6 @@ define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -397,10 +332,6 @@ define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s
-; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
-; CHECK-NEXT: and z2.s, z2.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
; CHECK-NEXT: ret
@@ -445,10 +376,6 @@ define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -471,15 +398,8 @@ define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z2.d, z3.d
-; VBITS_GE_256-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: ptrue p1.d
-; VBITS_GE_256-NEXT: mov z5.d, p2/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
-; VBITS_GE_256-NEXT: and z5.d, z5.d, #0x1
-; VBITS_GE_256-NEXT: cmpne p2.d, p1/z, z4.d, #0
-; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z5.d, #0
-; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z1.d
-; VBITS_GE_256-NEXT: sel z1.d, p1, z2.d, z3.d
+; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d
+; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
; VBITS_GE_256-NEXT: ret
@@ -490,10 +410,6 @@ define void @select_v8i64(ptr %a, ptr %b) #0 {
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
-; VBITS_GE_512-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; VBITS_GE_512-NEXT: ptrue p1.d
-; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
-; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
; VBITS_GE_512-NEXT: ret
@@ -512,10 +428,6 @@ define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
@@ -534,10 +446,6 @@ define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 {
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.d
-; CHECK-NEXT: and z2.d, z2.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index ebd32c7..093e6cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -1198,15 +1198,11 @@ define void @masked_gather_passthru(ptr %a, ptr %b, ptr %c) vscale_range(16,0) #
; CHECK-NEXT: ptrue p0.s, vl32
; CHECK-NEXT: ptrue p2.d, vl32
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0
; CHECK-NEXT: ld1d { z0.d }, p2/z, [x1]
; CHECK-NEXT: punpklo p2.h, p1.b
-; CHECK-NEXT: mov z1.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.d }, p2/z, [z0.d]
-; CHECK-NEXT: and z1.s, z1.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p1/z, z1.s, #0
-; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2]
; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index 8b845df..ec0693a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -199,13 +199,6 @@ define void @select_v16f16(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z0.h
; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: and z4.h, z4.h, #0x1
-; CHECK-NEXT: and z5.h, z5.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z5.h, #0
; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
@@ -441,13 +434,6 @@ define void @select_v8f32(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmeq p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: fcmeq p0.s, p0/z, z2.s, z3.s
-; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: and z4.s, z4.s, #0x1
-; CHECK-NEXT: and z5.s, z5.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z5.s, #0
; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
@@ -572,13 +558,6 @@ define void @select_v4f64(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: fcmeq p1.d, p0/z, z1.d, z0.d
; CHECK-NEXT: fcmeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and z4.d, z4.d, #0x1
-; CHECK-NEXT: and z5.d, z5.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z5.d, #0
; CHECK-NEXT: mov z0.d, p1/m, z1.d
; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
index 12b7886..3970113 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll
@@ -293,13 +293,6 @@ define void @select_v32i8(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z0.b
; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b
-; CHECK-NEXT: mov z4.b, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.b, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.b
-; CHECK-NEXT: and z4.b, z4.b, #0x1
-; CHECK-NEXT: and z5.b, z5.b, #0x1
-; CHECK-NEXT: cmpne p1.b, p0/z, z4.b, #0
-; CHECK-NEXT: cmpne p0.b, p0/z, z5.b, #0
; CHECK-NEXT: mov z0.b, p1/m, z1.b
; CHECK-NEXT: sel z1.b, p0, z2.b, z3.b
; CHECK-NEXT: stp q0, q1, [x0]
@@ -704,13 +697,6 @@ define void @select_v16i16(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z0.h
; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h
-; CHECK-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.h
-; CHECK-NEXT: and z4.h, z4.h, #0x1
-; CHECK-NEXT: and z5.h, z5.h, #0x1
-; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, #0
-; CHECK-NEXT: cmpne p0.h, p0/z, z5.h, #0
; CHECK-NEXT: mov z0.h, p1/m, z1.h
; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h
; CHECK-NEXT: stp q0, q1, [x0]
@@ -925,13 +911,6 @@ define void @select_v8i32(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z0.s
; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s
-; CHECK-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.s
-; CHECK-NEXT: and z4.s, z4.s, #0x1
-; CHECK-NEXT: and z5.s, z5.s, #0x1
-; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, #0
-; CHECK-NEXT: cmpne p0.s, p0/z, z5.s, #0
; CHECK-NEXT: mov z0.s, p1/m, z1.s
; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s
; CHECK-NEXT: stp q0, q1, [x0]
@@ -1065,13 +1044,6 @@ define void @select_v4i64(ptr %a, ptr %b) {
; CHECK-NEXT: ldp q1, q2, [x0]
; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z0.d
; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d
-; CHECK-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: mov z5.d, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT: ptrue p0.d
-; CHECK-NEXT: and z4.d, z4.d, #0x1
-; CHECK-NEXT: and z5.d, z5.d, #0x1
-; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
-; CHECK-NEXT: cmpne p0.d, p0/z, z5.d, #0
; CHECK-NEXT: mov z0.d, p1/m, z1.d
; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d
; CHECK-NEXT: stp q0, q1, [x0]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll
index 5aa3a24..aba9056 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfw-web-simplification.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING1
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING,NO_FOLDING2
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFH
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING,ZVFHMIN
; Check that the default value enables the web folding and
@@ -8,35 +8,20 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+f,+d -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=FOLDING
define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a, <2 x half> %b, <2 x half> %b2) {
-; NO_FOLDING1-LABEL: vfwmul_v2f116_multiple_users:
-; NO_FOLDING1: # %bb.0:
-; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
-; NO_FOLDING1-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
-; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
-; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
-; NO_FOLDING1-NEXT: vse32.v v10, (a0)
-; NO_FOLDING1-NEXT: vse32.v v11, (a1)
-; NO_FOLDING1-NEXT: vse32.v v8, (a2)
-; NO_FOLDING1-NEXT: ret
-;
-; NO_FOLDING2-LABEL: vfwmul_v2f116_multiple_users:
-; NO_FOLDING2: # %bb.0:
-; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
-; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
-; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
-; NO_FOLDING2-NEXT: vse32.v v9, (a0)
-; NO_FOLDING2-NEXT: vse32.v v11, (a1)
-; NO_FOLDING2-NEXT: vse32.v v8, (a2)
-; NO_FOLDING2-NEXT: ret
+; NO_FOLDING-LABEL: vfwmul_v2f116_multiple_users:
+; NO_FOLDING: # %bb.0:
+; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
+; NO_FOLDING-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
+; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
+; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
+; NO_FOLDING-NEXT: vse32.v v10, (a0)
+; NO_FOLDING-NEXT: vse32.v v11, (a1)
+; NO_FOLDING-NEXT: vse32.v v8, (a2)
+; NO_FOLDING-NEXT: ret
;
; ZVFH-LABEL: vfwmul_v2f116_multiple_users:
; ZVFH: # %bb.0:
@@ -76,35 +61,20 @@ define void @vfwmul_v2f116_multiple_users(ptr %x, ptr %y, ptr %z, <2 x half> %a,
}
define void @vfwmul_v2f32_multiple_users(ptr %x, ptr %y, ptr %z, <2 x float> %a, <2 x float> %b, <2 x float> %b2) {
-; NO_FOLDING1-LABEL: vfwmul_v2f32_multiple_users:
-; NO_FOLDING1: # %bb.0:
-; NO_FOLDING1-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v11, v8
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v8, v9
-; NO_FOLDING1-NEXT: vfwcvt.f.f.v v9, v10
-; NO_FOLDING1-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; NO_FOLDING1-NEXT: vfmul.vv v10, v11, v8
-; NO_FOLDING1-NEXT: vfadd.vv v11, v11, v9
-; NO_FOLDING1-NEXT: vfsub.vv v8, v8, v9
-; NO_FOLDING1-NEXT: vse64.v v10, (a0)
-; NO_FOLDING1-NEXT: vse64.v v11, (a1)
-; NO_FOLDING1-NEXT: vse64.v v8, (a2)
-; NO_FOLDING1-NEXT: ret
-;
-; NO_FOLDING2-LABEL: vfwmul_v2f32_multiple_users:
-; NO_FOLDING2: # %bb.0:
-; NO_FOLDING2-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; NO_FOLDING2-NEXT: vfwcvt.f.f.v v11, v8
-; NO_FOLDING2-NEXT: vfwcvt.f.f.v v8, v9
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; NO_FOLDING2-NEXT: vfmul.vv v9, v11, v8
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; NO_FOLDING2-NEXT: vfwadd.wv v11, v11, v10
-; NO_FOLDING2-NEXT: vfwsub.wv v8, v8, v10
-; NO_FOLDING2-NEXT: vse64.v v9, (a0)
-; NO_FOLDING2-NEXT: vse64.v v11, (a1)
-; NO_FOLDING2-NEXT: vse64.v v8, (a2)
-; NO_FOLDING2-NEXT: ret
+; NO_FOLDING-LABEL: vfwmul_v2f32_multiple_users:
+; NO_FOLDING: # %bb.0:
+; NO_FOLDING-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v11, v8
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v8, v9
+; NO_FOLDING-NEXT: vfwcvt.f.f.v v9, v10
+; NO_FOLDING-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; NO_FOLDING-NEXT: vfmul.vv v10, v11, v8
+; NO_FOLDING-NEXT: vfadd.vv v11, v11, v9
+; NO_FOLDING-NEXT: vfsub.vv v8, v8, v9
+; NO_FOLDING-NEXT: vse64.v v10, (a0)
+; NO_FOLDING-NEXT: vse64.v v11, (a1)
+; NO_FOLDING-NEXT: vse64.v v8, (a2)
+; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vfwmul_v2f32_multiple_users:
; FOLDING: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vw-web-simplification.ll
index b093e9e3..227a428 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vw-web-simplification.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vw-web-simplification.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING1
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING1
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING2
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING2
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=1 | FileCheck %s --check-prefixes=NO_FOLDING
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=2 | FileCheck %s --check-prefixes=NO_FOLDING
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs %s -o - --riscv-lower-ext-max-web-size=3 | FileCheck %s --check-prefixes=FOLDING
; Check that the default value enables the web folding and
@@ -16,38 +16,21 @@
; We need the web size to be at least 3 for the folding to happen, because
; %c has 3 uses.
define <2 x i16> @vwmul_v2i16_multiple_users(ptr %x, ptr %y, ptr %z) {
-; NO_FOLDING1-LABEL: vwmul_v2i16_multiple_users:
-; NO_FOLDING1: # %bb.0:
-; NO_FOLDING1-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; NO_FOLDING1-NEXT: vle8.v v8, (a0)
-; NO_FOLDING1-NEXT: vle8.v v9, (a1)
-; NO_FOLDING1-NEXT: vle8.v v10, (a2)
-; NO_FOLDING1-NEXT: vsext.vf2 v11, v8
-; NO_FOLDING1-NEXT: vsext.vf2 v8, v9
-; NO_FOLDING1-NEXT: vsext.vf2 v9, v10
-; NO_FOLDING1-NEXT: vmul.vv v8, v11, v8
-; NO_FOLDING1-NEXT: vadd.vv v10, v11, v9
-; NO_FOLDING1-NEXT: vsub.vv v9, v11, v9
-; NO_FOLDING1-NEXT: vor.vv v8, v8, v10
-; NO_FOLDING1-NEXT: vor.vv v8, v8, v9
-; NO_FOLDING1-NEXT: ret
-;
-; NO_FOLDING2-LABEL: vwmul_v2i16_multiple_users:
-; NO_FOLDING2: # %bb.0:
-; NO_FOLDING2-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; NO_FOLDING2-NEXT: vle8.v v8, (a0)
-; NO_FOLDING2-NEXT: vle8.v v9, (a1)
-; NO_FOLDING2-NEXT: vle8.v v10, (a2)
-; NO_FOLDING2-NEXT: vsext.vf2 v11, v8
-; NO_FOLDING2-NEXT: vsext.vf2 v8, v9
-; NO_FOLDING2-NEXT: vmul.vv v8, v11, v8
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; NO_FOLDING2-NEXT: vwadd.wv v9, v11, v10
-; NO_FOLDING2-NEXT: vwsub.wv v11, v11, v10
-; NO_FOLDING2-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; NO_FOLDING2-NEXT: vor.vv v8, v8, v9
-; NO_FOLDING2-NEXT: vor.vv v8, v8, v11
-; NO_FOLDING2-NEXT: ret
+; NO_FOLDING-LABEL: vwmul_v2i16_multiple_users:
+; NO_FOLDING: # %bb.0:
+; NO_FOLDING-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; NO_FOLDING-NEXT: vle8.v v8, (a0)
+; NO_FOLDING-NEXT: vle8.v v9, (a1)
+; NO_FOLDING-NEXT: vle8.v v10, (a2)
+; NO_FOLDING-NEXT: vsext.vf2 v11, v8
+; NO_FOLDING-NEXT: vsext.vf2 v8, v9
+; NO_FOLDING-NEXT: vsext.vf2 v9, v10
+; NO_FOLDING-NEXT: vmul.vv v8, v11, v8
+; NO_FOLDING-NEXT: vadd.vv v10, v11, v9
+; NO_FOLDING-NEXT: vsub.vv v9, v11, v9
+; NO_FOLDING-NEXT: vor.vv v8, v8, v10
+; NO_FOLDING-NEXT: vor.vv v8, v8, v9
+; NO_FOLDING-NEXT: ret
;
; FOLDING-LABEL: vwmul_v2i16_multiple_users:
; FOLDING: # %bb.0: