aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Reames <preames@rivosinc.com>2024-02-08 11:28:06 -0800
committerGitHub <noreply@github.com>2024-02-08 11:28:06 -0800
commitd0f72f88606b78447fb7b61214651854c787c26f (patch)
tree1239cad761991a183f49e5eaa1a716702035fa38
parent88e52511ca71165f1ff3d7c42229aeacb2c16db3 (diff)
downloadllvm-d0f72f88606b78447fb7b61214651854c787c26f.zip
llvm-d0f72f88606b78447fb7b61214651854c787c26f.tar.gz
llvm-d0f72f88606b78447fb7b61214651854c787c26f.tar.bz2
[RISCV] Consider truncate semantics in performBUILD_VECTORCombine (#81168)
Fixes https://github.com/llvm/llvm-project/issues/80910. Per the documentation in ISDOpcodes.h, for BUILD_VECTOR "The types of the operands must match the vector element type, except that integer types are allowed to be larger than the element type, in which case the operands are implicitly truncated." This transform was assuming that the scalar operand type matched the result type. This resulted in essentially performing a truncate before a binop, instead of after. As demonstrated by the test case changes, this is often not legal.
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll513
-rw-r--r--llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll53
4 files changed, 399 insertions, 179 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27037f4..0799cc2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14956,6 +14956,11 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
return SDValue();
+ // This BUILD_VECTOR involves an implicit truncation, and sinking
+ // truncates through binops is non-trivial.
+ if (N->op_begin()->getValueType() != VT.getVectorElementType())
+ return SDValue();
+
SmallVector<SDValue> LHSOps;
SmallVector<SDValue> RHSOps;
for (SDValue Op : N->ops()) {
@@ -14983,6 +14988,7 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
// have different LHS and RHS types.
if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType())
return SDValue();
+
RHSOps.push_back(Op.getOperand(1));
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index e376688..af7d7f7a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -589,7 +589,8 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b,
ret <8 x i32> %v3
}
-; FIXME: This is currently showing a miscompile, we effectively
+; Here we can not pull the ashr through into the vector domain due to
+; the truncate semantics of the build_vector. Doing so would
; truncate before the ashr instead of after it, so if %a or %b
; is e.g. UINT32_MAX+1 we get different result.
define <2 x i32> @build_vec_of_trunc_op(i64 %a, i64 %b) {
@@ -608,10 +609,11 @@ define <2 x i32> @build_vec_of_trunc_op(i64 %a, i64 %b) {
;
; RV64-LABEL: build_vec_of_trunc_op:
; RV64: # %bb.0: # %entry
+; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: srli a1, a1, 1
; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vsrl.vi v8, v8, 1
; RV64-NEXT: ret
entry:
%conv11.i = ashr i64 %a, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index cd47720..ead41b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -3,30 +3,65 @@
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vv_v6i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a2, 0(a2)
-; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: srli a1, a2, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a2, a2, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a2
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
-; CHECK-NEXT: vse32.v v8, (a3)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vv_v6i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: slli a1, a2, 30
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: andi a4, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a4
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 28
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV32-NEXT: vle32.v v8, (a0), v0.t
+; RV32-NEXT: vse32.v v8, (a3)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vv_v6i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: vle32.v v8, (a1)
+; RV64-NEXT: slli a1, a2, 62
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: andi a4, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a4
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 60
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV64-NEXT: vle32.v v8, (a0), v0.t
+; RV64-NEXT: vse32.v v8, (a3)
+; RV64-NEXT: ret
%va = load <6 x i32>, ptr %a
%vb = load <6 x i32>, ptr %b
%vcc = load <6 x i1>, ptr %cc
@@ -36,31 +71,67 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vx_v6i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a2, 0(a2)
-; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: srli a1, a2, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a2, a2, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a2
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a3)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vx_v6i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: slli a1, a2, 30
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: andi a4, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a4
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 28
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v8, (a3)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vx_v6i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: vle32.v v8, (a1)
+; RV64-NEXT: slli a1, a2, 62
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: andi a4, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a4
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 60
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v8, (a3)
+; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
%ahead = insertelement <6 x i32> poison, i32 %a, i32 0
%va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -71,31 +142,67 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vi_v6i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a1, 0(a1)
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: srli a0, a1, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a1, a1, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a2)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vi_v6i32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a1, 0(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: slli a0, a1, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: andi a3, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a3
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 28
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vim v8, v8, -1, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v8, (a2)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vi_v6i32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: slli a0, a1, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: andi a3, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a3
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 60
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vim v8, v8, -1, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v8, (a2)
+; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
%a = insertelement <6 x i32> poison, i32 -1, i32 0
%va = shufflevector <6 x i32> %a, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -107,30 +214,65 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vv_v6f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a2, 0(a2)
-; CHECK-NEXT: vle32.v v8, (a1)
-; CHECK-NEXT: srli a1, a2, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a1, a2, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: srli a2, a2, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a2
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; CHECK-NEXT: vle32.v v8, (a0), v0.t
-; CHECK-NEXT: vse32.v v8, (a3)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vv_v6f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a2, 0(a2)
+; RV32-NEXT: vle32.v v8, (a1)
+; RV32-NEXT: slli a1, a2, 30
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: andi a4, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a4
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 29
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 28
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: slli a1, a2, 27
+; RV32-NEXT: srli a1, a1, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: srli a2, a2, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a2
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV32-NEXT: vle32.v v8, (a0), v0.t
+; RV32-NEXT: vse32.v v8, (a3)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vv_v6f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a2, 0(a2)
+; RV64-NEXT: vle32.v v8, (a1)
+; RV64-NEXT: slli a1, a2, 62
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: andi a4, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a4
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 61
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 60
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: slli a1, a2, 59
+; RV64-NEXT: srli a1, a1, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: srli a2, a2, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a2
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; RV64-NEXT: vle32.v v8, (a0), v0.t
+; RV64-NEXT: vse32.v v8, (a3)
+; RV64-NEXT: ret
%va = load <6 x float>, ptr %a
%vb = load <6 x float>, ptr %b
%vcc = load <6 x i1>, ptr %cc
@@ -140,31 +282,67 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vx_v6f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a1, 0(a1)
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: srli a0, a1, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a1, a1, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a2)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vx_v6f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a1, 0(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: slli a0, a1, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: andi a3, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a3
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 28
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v8, (a2)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vx_v6f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: slli a0, a1, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: andi a3, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a3
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 60
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v8, (a2)
+; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
%ahead = insertelement <6 x float> poison, float %a, i32 0
%va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer
@@ -175,31 +353,67 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
-; CHECK-LABEL: vselect_vfpzero_v6f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: lbu a1, 0(a1)
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: srli a0, a1, 1
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a1
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 2
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 3
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a0, a1, 4
-; CHECK-NEXT: vslide1down.vx v10, v10, a0
-; CHECK-NEXT: srli a1, a1, 5
-; CHECK-NEXT: vslide1down.vx v10, v10, a1
-; CHECK-NEXT: vslidedown.vi v10, v10, 2
-; CHECK-NEXT: vand.vi v10, v10, 1
-; CHECK-NEXT: vmsne.vi v0, v10, 0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a2)
-; CHECK-NEXT: ret
+; RV32-LABEL: vselect_vfpzero_v6f32:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: lbu a1, 0(a1)
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: slli a0, a1, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: andi a3, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a3
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 28
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: slli a0, a1, 27
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v10, v10, a0
+; RV32-NEXT: srli a1, a1, 5
+; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vslidedown.vi v10, v10, 2
+; RV32-NEXT: vand.vi v10, v10, 1
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vim v8, v8, 0, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV32-NEXT: vse32.v v8, (a2)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vselect_vfpzero_v6f32:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: lbu a1, 0(a1)
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: slli a0, a1, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: andi a3, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a3
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 60
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: slli a0, a1, 59
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v10, v10, a0
+; RV64-NEXT: srli a1, a1, 5
+; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vslidedown.vi v10, v10, 2
+; RV64-NEXT: vand.vi v10, v10, 1
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vim v8, v8, 0, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; RV64-NEXT: vse32.v v8, (a2)
+; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
%a = insertelement <6 x float> poison, float 0.0, i32 0
%va = shufflevector <6 x float> %a, <6 x float> poison, <6 x i32> zeroinitializer
@@ -497,6 +711,3 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) {
%v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b
ret <64 x i1> %v
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 4544cba..c016e8f 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -521,33 +521,35 @@ define void @test_urem_vec(ptr %X) nounwind {
;
; RV32MV-LABEL: test_urem_vec:
; RV32MV: # %bb.0:
-; RV32MV-NEXT: lbu a1, 4(a0)
-; RV32MV-NEXT: lw a2, 0(a0)
-; RV32MV-NEXT: slli a1, a1, 10
-; RV32MV-NEXT: srli a3, a2, 22
-; RV32MV-NEXT: or a1, a3, a1
-; RV32MV-NEXT: srli a3, a2, 11
+; RV32MV-NEXT: lw a1, 0(a0)
+; RV32MV-NEXT: andi a2, a1, 2047
; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32MV-NEXT: vmv.v.x v8, a2
+; RV32MV-NEXT: lbu a2, 4(a0)
+; RV32MV-NEXT: slli a3, a1, 10
+; RV32MV-NEXT: srli a3, a3, 21
; RV32MV-NEXT: vslide1down.vx v8, v8, a3
+; RV32MV-NEXT: slli a2, a2, 10
+; RV32MV-NEXT: srli a1, a1, 22
+; RV32MV-NEXT: or a1, a1, a2
+; RV32MV-NEXT: andi a1, a1, 2047
; RV32MV-NEXT: vslide1down.vx v8, v8, a1
+; RV32MV-NEXT: lui a1, %hi(.LCPI4_0)
+; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
+; RV32MV-NEXT: vle16.v v9, (a1)
; RV32MV-NEXT: vslidedown.vi v8, v8, 1
-; RV32MV-NEXT: li a1, 2047
-; RV32MV-NEXT: lui a2, %hi(.LCPI4_0)
-; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_0)
-; RV32MV-NEXT: vle16.v v9, (a2)
-; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vid.v v10
; RV32MV-NEXT: vsub.vv v8, v8, v10
; RV32MV-NEXT: vmul.vv v8, v8, v9
; RV32MV-NEXT: vadd.vv v9, v8, v8
-; RV32MV-NEXT: lui a2, 41121
-; RV32MV-NEXT: addi a2, a2, -1527
+; RV32MV-NEXT: lui a1, 41121
+; RV32MV-NEXT: addi a1, a1, -1527
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT: vmv.s.x v10, a2
+; RV32MV-NEXT: vmv.s.x v10, a1
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32MV-NEXT: vsext.vf2 v11, v10
; RV32MV-NEXT: vsll.vv v9, v9, v11
+; RV32MV-NEXT: li a1, 2047
; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32MV-NEXT: vmv.v.i v10, 1
@@ -585,31 +587,30 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: lwu a2, 0(a0)
; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a1, a2, a1
+; RV64MV-NEXT: slli a2, a1, 42
+; RV64MV-NEXT: srli a2, a2, 53
+; RV64MV-NEXT: andi a3, a1, 2047
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT: vmv.v.x v8, a1
-; RV64MV-NEXT: slli a1, a1, 24
-; RV64MV-NEXT: srli a1, a1, 24
-; RV64MV-NEXT: srli a2, a1, 11
+; RV64MV-NEXT: vmv.v.x v8, a3
; RV64MV-NEXT: vslide1down.vx v8, v8, a2
; RV64MV-NEXT: srli a1, a1, 22
; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: lui a1, %hi(.LCPI4_0)
+; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
+; RV64MV-NEXT: vle16.v v9, (a1)
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: li a1, 2047
-; RV64MV-NEXT: lui a2, %hi(.LCPI4_0)
-; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_0)
-; RV64MV-NEXT: vle16.v v9, (a2)
-; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: vid.v v10
; RV64MV-NEXT: vsub.vv v8, v8, v10
; RV64MV-NEXT: vmul.vv v8, v8, v9
; RV64MV-NEXT: vadd.vv v9, v8, v8
-; RV64MV-NEXT: lui a2, 41121
-; RV64MV-NEXT: addi a2, a2, -1527
+; RV64MV-NEXT: lui a1, 41121
+; RV64MV-NEXT: addi a1, a1, -1527
; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64MV-NEXT: vmv.s.x v10, a2
+; RV64MV-NEXT: vmv.s.x v10, a1
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64MV-NEXT: vsext.vf2 v11, v10
; RV64MV-NEXT: vsll.vv v9, v9, v11
+; RV64MV-NEXT: li a1, 2047
; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64MV-NEXT: vmv.v.i v10, 1