aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwizardengineer <juliuswoosebert@gmail.com>2026-03-07 15:38:43 -0500
committerwizardengineer <juliuswoosebert@gmail.com>2026-03-10 01:33:00 -0400
commit8616f01e68dca2b9b05765dce0e3cec4d1cf87db (patch)
treeffac1c51700c0f447bcb719939360463d2474586
parenta3c8bb26751264a3083fc663f2a03a0c84ac34f9 (diff)
downloadllvm-users/wizardengineer/ct-select-riscv.tar.gz
llvm-users/wizardengineer/ct-select-riscv.tar.bz2
llvm-users/wizardengineer/ct-select-riscv.zip
[LLVM][RISCV] Regenerate ct.select test CHECK linesusers/wizardengineer/ct-select-riscv
Update CHECK lines to match the new constant-time AND/OR/XOR expansion from the CT_SELECT legalization fix.
-rw-r--r--llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll144
-rw-r--r--llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll139
-rw-r--r--llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll476
-rw-r--r--llvm/test/CodeGen/RISCV/ctselect-side-effects.ll15
4 files changed, 267 insertions, 507 deletions
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
index af1be0c8f3dd..06e1009485ac 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
@@ -6,18 +6,20 @@
define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
; RV64-LABEL: test_ctselect_i1:
; RV64: # %bb.0:
-; RV64-NEXT: and a1, a0, a1
-; RV64-NEXT: xori a0, a0, 1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_i1:
; RV32: # %bb.0:
-; RV32-NEXT: and a1, a0, a1
-; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
ret i1 %result
@@ -27,21 +29,17 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
define i32 @test_ctselect_extremal_values(i1 %cond) {
; RV64-LABEL: test_ctselect_extremal_values:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: lui a1, 524288
-; RV64-NEXT: subw a0, a1, a0
+; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_extremal_values:
; RV32: # %bb.0:
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: addi a2, a0, -1
-; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: sub a0, a1, a0
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
ret i32 %result
@@ -53,14 +51,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
-; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_null_ptr:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai a0, a0, 31
-; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
ret ptr %result
@@ -70,22 +68,20 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
; RV64-LABEL: test_ctselect_function_ptr:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: neg a3, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a1, a3, a1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_function_ptr:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
ret ptr %result
@@ -97,22 +93,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
; RV64: # %bb.0:
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: snez a0, a0
+; RV64-NEXT: xor a2, a2, a3
; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a2, a0, a2
-; RV64-NEXT: not a0, a0
-; RV64-NEXT: and a0, a0, a3
-; RV64-NEXT: or a0, a2, a0
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a3, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_ptr_cmp:
; RV32: # %bb.0:
; RV32-NEXT: xor a0, a0, a1
; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xor a2, a2, a3
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a0, a2
-; RV32-NEXT: not a0, a0
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: xor a0, a3, a0
; RV32-NEXT: ret
%cmp = icmp eq ptr %p1, %p2
%result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
@@ -125,22 +119,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
; RV64-LABEL: test_ctselect_struct_ptr:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: neg a3, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a1, a3, a1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_struct_ptr:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
ret ptr %result
@@ -162,44 +154,40 @@ define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a,
; RV64-NEXT: srai a1, a1, 63
; RV64-NEXT: srai a2, a2, 63
; RV64-NEXT: and a0, a4, a0
-; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: xor a0, a5, a0
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: xor a1, a7, t0
-; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: xor a0, a6, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: srai a3, a3, 63
; RV64-NEXT: and a0, a0, a3
-; RV64-NEXT: xor a0, a0, t0
+; RV64-NEXT: xor a0, t0, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_deeply_nested:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: andi a2, a2, 1
-; RV32-NEXT: andi a3, a3, 1
-; RV32-NEXT: neg t1, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a4, t1, a4
-; RV32-NEXT: neg t1, a1
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: and a0, a0, a5
-; RV32-NEXT: neg a5, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a1, a1, a6
-; RV32-NEXT: neg a6, a3
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a2, a2, a7
-; RV32-NEXT: or a0, a4, a0
-; RV32-NEXT: and a0, t1, a0
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: and a0, a5, a0
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: and a0, a6, a0
-; RV32-NEXT: and a1, a3, t0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a4, a4, a5
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: xor a5, a5, a6
+; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: xor a6, a6, a7
+; RV32-NEXT: slli a2, a2, 31
+; RV32-NEXT: slli a3, a3, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: srai a2, a2, 31
+; RV32-NEXT: and a0, a4, a0
+; RV32-NEXT: xor a0, a5, a0
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: xor a1, a7, t0
+; RV32-NEXT: xor a0, a6, a0
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
+; RV32-NEXT: srai a3, a3, 31
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: xor a0, t0, a0
; RV32-NEXT: ret
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
index 1149971fd090..5fbc36d07db8 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
@@ -7,13 +7,13 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
; RV64-LABEL: test_ctselect_smin_zero:
; RV64: # %bb.0:
; RV64-NEXT: sraiw a1, a0, 31
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smin_zero:
; RV32: # %bb.0:
; RV32-NEXT: srai a1, a0, 31
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
%cmp = icmp slt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
@@ -27,14 +27,14 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
; RV64-NEXT: sext.w a1, a0
; RV64-NEXT: sgtz a1, a1
; RV64-NEXT: neg a1, a1
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smax_zero:
; RV32: # %bb.0:
; RV32-NEXT: sgtz a1, a0
; RV32-NEXT: neg a1, a1
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
%cmp = icmp sgt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
@@ -51,17 +51,16 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smin_generic:
; RV32: # %bb.0:
; RV32-NEXT: slt a2, a0, a1
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp slt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -78,17 +77,16 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smax_generic:
; RV32: # %bb.0:
; RV32-NEXT: slt a2, a1, a0
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp sgt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -105,17 +103,16 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_umin_generic:
; RV32: # %bb.0:
; RV32-NEXT: sltu a2, a0, a1
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp ult i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -132,17 +129,16 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_umax_generic:
; RV32: # %bb.0:
; RV32-NEXT: sltu a2, a1, a0
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp ugt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -157,17 +153,16 @@ define i32 @test_ctselect_abs(i32 %x) {
; RV64-NEXT: xor a1, a1, a0
; RV64-NEXT: sraiw a2, a0, 31
; RV64-NEXT: and a1, a1, a2
-; RV64-NEXT: xor a0, a1, a0
+; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_abs:
; RV32: # %bb.0:
; RV32-NEXT: neg a1, a0
+; RV32-NEXT: xor a1, a1, a0
; RV32-NEXT: srai a2, a0, 31
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: not a2, a2
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: xor a0, a0, a1
; RV32-NEXT: ret
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -183,17 +178,16 @@ define i32 @test_ctselect_nabs(i32 %x) {
; RV64-NEXT: xor a2, a0, a1
; RV64-NEXT: sraiw a0, a0, 31
; RV64-NEXT: and a0, a2, a0
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_nabs:
; RV32: # %bb.0:
; RV32-NEXT: neg a1, a0
-; RV32-NEXT: srai a2, a0, 31
+; RV32-NEXT: xor a2, a0, a1
+; RV32-NEXT: srai a0, a0, 31
; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: not a2, a2
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -270,12 +264,7 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
;
; RV32-LABEL: test_ctselect_identical_operands:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a2, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a2, a1
-; RV32-NEXT: and a0, a0, a1
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
ret i32 %result
@@ -288,22 +277,21 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
; RV64-NEXT: sext.w a1, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: xor a0, a0, a1
-; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: xor a3, a3, a2
; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a2, a0
-; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_inverted_condition:
; RV32: # %bb.0:
; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xor a3, a3, a2
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a0, a2
-; RV32-NEXT: not a0, a0
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%cmp = icmp eq i32 %x, %y
%not_cmp = xor i1 %cmp, true
@@ -324,34 +312,31 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: srai a1, a1, 63
; RV64-NEXT: and a0, a3, a0
-; RV64-NEXT: xor a0, a0, a4
+; RV64-NEXT: xor a0, a4, a0
; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: xor a0, a5, a0
; RV64-NEXT: srai a2, a2, 63
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: xor a0, a6, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_chain:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: andi a2, a2, 1
-; RV32-NEXT: neg a7, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a3, a7, a3
-; RV32-NEXT: neg a7, a1
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: and a0, a0, a4
-; RV32-NEXT: neg a4, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a1, a1, a5
-; RV32-NEXT: or a0, a3, a0
-; RV32-NEXT: and a0, a7, a0
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: and a0, a4, a0
-; RV32-NEXT: and a1, a2, a6
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a3, a3, a4
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: xor a4, a4, a5
+; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: xor a5, a5, a6
+; RV32-NEXT: slli a2, a2, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: xor a0, a4, a0
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: xor a0, a5, a0
+; RV32-NEXT: srai a2, a2, 31
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a6, a0
; RV32-NEXT: ret
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
@@ -364,14 +349,14 @@ define i64 @test_ctselect_i64_smin_zero(i64 %x) {
; RV64-LABEL: test_ctselect_i64_smin_zero:
; RV64: # %bb.0:
; RV64-NEXT: srai a1, a0, 63
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_i64_smin_zero:
; RV32: # %bb.0:
; RV32-NEXT: srai a2, a1, 31
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: ret
%cmp = icmp slt i64 %x, 0
%result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
index a02e1e474944..6e5d3e72e14f 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
@@ -14,12 +14,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_basic:
@@ -29,12 +24,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_basic:
@@ -44,12 +34,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_basic:
@@ -59,12 +44,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -74,70 +54,46 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
define <vscale x 4 x i32> @ctsel_nxv4i32_load(i1 %cond, ptr %p1, ptr %p2) {
; RV64-LABEL: ctsel_nxv4i32_load:
; RV64: # %bb.0:
-; RV64-NEXT: vl2re32.v v8, (a1)
-; RV64-NEXT: vl2re32.v v10, (a2)
+; RV64-NEXT: vl2re32.v v8, (a2)
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-NEXT: vle32.v v8, (a1), v0.t
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_load:
; RV32: # %bb.0:
-; RV32-NEXT: vl2re32.v v8, (a1)
-; RV32-NEXT: vl2re32.v v10, (a2)
+; RV32-NEXT: vl2re32.v v8, (a2)
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a0
-; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a0
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT: vle32.v v8, (a1), v0.t
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_load:
; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: vl2re32.v v8, (a1)
-; RV32-V128-NEXT: vl2re32.v v10, (a2)
+; RV32-V128-NEXT: vl2re32.v v8, (a2)
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v12, a0
-; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v10, a0
+; RV32-V128-NEXT: vmsne.vi v0, v10, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-V128-NEXT: vle32.v v8, (a1), v0.t
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_load:
; RV64-V256: # %bb.0:
-; RV64-V256-NEXT: vl2re32.v v8, (a1)
-; RV64-V256-NEXT: vl2re32.v v10, (a2)
+; RV64-V256-NEXT: vl2re32.v v8, (a2)
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v12, a0
-; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v10, a0
+; RV64-V256-NEXT: vmsne.vi v0, v10, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-V256-NEXT: vle32.v v8, (a1), v0.t
; RV64-V256-NEXT: ret
%a = load <vscale x 4 x i32>, ptr %p1, align 16
%b = load <vscale x 4 x i32>, ptr %p2, align 16
@@ -155,16 +111,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vadd.vv v8, v8, v8
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64-NEXT: vadd.vv v10, v10, v10
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vs2r.v v8, (a3)
+; RV64-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV64-NEXT: vs2r.v v10, (a3)
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_mixed:
@@ -175,16 +125,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vadd.vv v8, v8, v8
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vs2r.v v8, (a3)
+; RV32-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV32-NEXT: vs2r.v v10, (a3)
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_mixed:
@@ -195,16 +139,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vadd.vv v8, v8, v8
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-V128-NEXT: vadd.vv v10, v10, v10
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
-; RV32-V128-NEXT: vs2r.v v8, (a3)
+; RV32-V128-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV32-V128-NEXT: vs2r.v v10, (a3)
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_mixed:
@@ -215,16 +153,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vadd.vv v8, v8, v8
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64-V256-NEXT: vadd.vv v10, v10, v10
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
-; RV64-V256-NEXT: vs2r.v v8, (a3)
+; RV64-V256-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV64-V256-NEXT: vs2r.v v10, (a3)
; RV64-V256-NEXT: ret
%a = load <vscale x 4 x i32>, ptr %p1, align 16
%b = load <vscale x 4 x i32>, ptr %p2, align 16
@@ -290,105 +222,65 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_chain(i1 %c1, i1 %c2,
; RV64-LABEL: ctsel_nxv4i32_chain:
; RV64: # %bb.0:
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v14, 0
+; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v14, a0
+; RV64-NEXT: vmsne.vi v0, v14, 0
; RV64-NEXT: andi a1, a1, 1
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v16, a0
-; RV64-NEXT: vmsne.vi v0, v16, 0
-; RV64-NEXT: vmv.v.x v18, a1
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmsne.vi v0, v18, 0
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v14, v14, -1, v0
-; RV64-NEXT: vand.vv v8, v16, v8
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vand.vv v10, v16, v10
-; RV64-NEXT: vnot.v v16, v14
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vand.vv v8, v14, v8
-; RV64-NEXT: vand.vv v10, v16, v12
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_chain:
; RV32: # %bb.0:
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v14, 0
+; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v14, a0
+; RV32-NEXT: vmsne.vi v0, v14, 0
; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v16, a0
-; RV32-NEXT: vmsne.vi v0, v16, 0
-; RV32-NEXT: vmv.v.x v18, a1
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmsne.vi v0, v18, 0
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v14, v14, -1, v0
-; RV32-NEXT: vand.vv v8, v16, v8
-; RV32-NEXT: vnot.v v16, v16
-; RV32-NEXT: vand.vv v10, v16, v10
-; RV32-NEXT: vnot.v v16, v14
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vand.vv v8, v14, v8
-; RV32-NEXT: vand.vv v10, v16, v12
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_chain:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v14, 0
+; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v14, a0
+; RV32-V128-NEXT: vmsne.vi v0, v14, 0
; RV32-V128-NEXT: andi a1, a1, 1
-; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v16, a0
-; RV32-V128-NEXT: vmsne.vi v0, v16, 0
-; RV32-V128-NEXT: vmv.v.x v18, a1
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmsne.vi v0, v18, 0
+; RV32-V128-NEXT: vmv.v.x v10, a1
+; RV32-V128-NEXT: vmsne.vi v0, v10, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmerge.vim v14, v14, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v16, v8
-; RV32-V128-NEXT: vnot.v v16, v16
-; RV32-V128-NEXT: vand.vv v10, v16, v10
-; RV32-V128-NEXT: vnot.v v16, v14
-; RV32-V128-NEXT: vor.vv v8, v8, v10
-; RV32-V128-NEXT: vand.vv v8, v14, v8
-; RV32-V128-NEXT: vand.vv v10, v16, v12
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v12, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_chain:
; RV64-V256: # %bb.0:
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v14, 0
+; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v14, a0
+; RV64-V256-NEXT: vmsne.vi v0, v14, 0
; RV64-V256-NEXT: andi a1, a1, 1
-; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v16, a0
-; RV64-V256-NEXT: vmsne.vi v0, v16, 0
-; RV64-V256-NEXT: vmv.v.x v18, a1
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmsne.vi v0, v18, 0
+; RV64-V256-NEXT: vmv.v.x v10, a1
+; RV64-V256-NEXT: vmsne.vi v0, v10, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmerge.vim v14, v14, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v16, v8
-; RV64-V256-NEXT: vnot.v v16, v16
-; RV64-V256-NEXT: vand.vv v10, v16, v10
-; RV64-V256-NEXT: vnot.v v16, v14
-; RV64-V256-NEXT: vor.vv v8, v8, v10
-; RV64-V256-NEXT: vand.vv v8, v14, v8
-; RV64-V256-NEXT: vand.vv v10, v16, v12
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v12, v8, v0
; RV64-V256-NEXT: ret
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b,
@@ -407,12 +299,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv8i16_basic:
@@ -422,12 +309,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv8i16_basic:
@@ -437,12 +319,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv8i16_basic:
@@ -452,12 +329,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 8 x i16> @llvm.ct.select.nxv8i16(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %r
@@ -470,12 +342,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv16i8_basic:
@@ -484,12 +351,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv16i8_basic:
@@ -498,12 +360,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV32-V128-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv16i8_basic:
@@ -512,12 +369,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV64-V256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.ct.select.nxv16i8(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %r
@@ -532,42 +384,37 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv2i64_basic:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vxor.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: vxor.vv v8, v10, v8
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv2i64_basic:
; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vxor.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vmv.v.i v12, 0
; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vand.vv v8, v8, v12
+; RV32-V128-NEXT: vxor.vv v8, v10, v8
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv2i64_basic:
@@ -577,12 +424,7 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 2 x i64> @llvm.ct.select.nxv2i64(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %r
@@ -597,12 +439,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4f32_basic:
@@ -612,12 +449,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4f32_basic:
@@ -627,12 +459,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4f32_basic:
@@ -642,12 +469,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %r
@@ -657,74 +479,50 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
define <vscale x 4 x float> @ctsel_nxv4f32_arith(i1 %cond, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
; RV64-LABEL: ctsel_nxv4f32_arith:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vfadd.vv v12, v8, v10
-; RV64-NEXT: vfsub.vv v8, v8, v10
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vmerge.vim v10, v10, -1, v0
-; RV64-NEXT: vand.vv v12, v10, v12
-; RV64-NEXT: vnot.v v10, v10
-; RV64-NEXT: vand.vv v8, v10, v8
-; RV64-NEXT: vor.vv v8, v12, v8
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-NEXT: vfsub.vv v12, v8, v10
+; RV64-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4f32_arith:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vfadd.vv v12, v8, v10
-; RV32-NEXT: vfsub.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vmerge.vim v10, v10, -1, v0
-; RV32-NEXT: vand.vv v12, v10, v12
-; RV32-NEXT: vnot.v v10, v10
-; RV32-NEXT: vand.vv v8, v10, v8
-; RV32-NEXT: vor.vv v8, v12, v8
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT: vfsub.vv v12, v8, v10
+; RV32-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4f32_arith:
; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vfadd.vv v12, v8, v10
-; RV32-V128-NEXT: vfsub.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v10, a0
-; RV32-V128-NEXT: vmsne.vi v0, v10, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v10, 0
-; RV32-V128-NEXT: vmerge.vim v10, v10, -1, v0
-; RV32-V128-NEXT: vand.vv v12, v10, v12
-; RV32-V128-NEXT: vnot.v v10, v10
-; RV32-V128-NEXT: vand.vv v8, v10, v8
-; RV32-V128-NEXT: vor.vv v8, v12, v8
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-V128-NEXT: vfsub.vv v12, v8, v10
+; RV32-V128-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV32-V128-NEXT: vmv.v.v v8, v12
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4f32_arith:
; RV64-V256: # %bb.0:
-; RV64-V256-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vfadd.vv v12, v8, v10
-; RV64-V256-NEXT: vfsub.vv v8, v8, v10
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v10, a0
-; RV64-V256-NEXT: vmsne.vi v0, v10, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v10, 0
-; RV64-V256-NEXT: vmerge.vim v10, v10, -1, v0
-; RV64-V256-NEXT: vand.vv v12, v10, v12
-; RV64-V256-NEXT: vnot.v v10, v10
-; RV64-V256-NEXT: vand.vv v8, v10, v8
-; RV64-V256-NEXT: vor.vv v8, v12, v8
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-V256-NEXT: vfsub.vv v12, v8, v10
+; RV64-V256-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV64-V256-NEXT: vmv.v.v v8, v12
; RV64-V256-NEXT: ret
%sum = fadd <vscale x 4 x float> %x, %y
%diff = fsub <vscale x 4 x float> %x, %y
@@ -740,42 +538,37 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv2f64_basic:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vxor.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: vxor.vv v8, v10, v8
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv2f64_basic:
; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vxor.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vmv.v.i v12, 0
; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vand.vv v8, v8, v12
+; RV32-V128-NEXT: vxor.vv v8, v10, v8
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv2f64_basic:
@@ -785,12 +578,7 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 2 x double> @llvm.ct.select.nxv2f64(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %r
diff --git a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
index 255d575ca8f9..6020a9cded35 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
@@ -40,17 +40,16 @@ define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: and a0, a1, a0
-; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_protected_no_branch:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
ret i32 %result
@@ -86,7 +85,7 @@ false:
ret i32 %b
}
-; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+; Test 5: Regular select (not ct.select)
define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
; RV64-LABEL: test_regular_select:
; RV64: # %bb.0: