diff options
| author | wizardengineer <juliuswoosebert@gmail.com> | 2026-03-07 15:38:43 -0500 |
|---|---|---|
| committer | wizardengineer <juliuswoosebert@gmail.com> | 2026-03-10 01:33:00 -0400 |
| commit | 8616f01e68dca2b9b05765dce0e3cec4d1cf87db (patch) | |
| tree | ffac1c51700c0f447bcb719939360463d2474586 | |
| parent | a3c8bb26751264a3083fc663f2a03a0c84ac34f9 (diff) | |
| download | llvm-users/wizardengineer/ct-select-riscv.tar.gz llvm-users/wizardengineer/ct-select-riscv.tar.bz2 llvm-users/wizardengineer/ct-select-riscv.zip | |
[LLVM][RISCV] Regenerate ct.select test CHECK linesusers/wizardengineer/ct-select-riscv
Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
| -rw-r--r-- | llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll | 144 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll | 139 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll | 476 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/ctselect-side-effects.ll | 15 |
4 files changed, 267 insertions, 507 deletions
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll index af1be0c8f3dd..06e1009485ac 100644 --- a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll +++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll @@ -6,18 +6,20 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { ; RV64-LABEL: test_ctselect_i1: ; RV64: # %bb.0: -; RV64-NEXT: and a1, a0, a1 -; RV64-NEXT: xori a0, a0, 1 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_i1: ; RV32: # %bb.0: -; RV32-NEXT: and a1, a0, a1 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a2, a0 ; RV32-NEXT: ret %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b) ret i1 %result @@ -27,21 +29,17 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { define i32 @test_ctselect_extremal_values(i1 %cond) { ; RV64-LABEL: test_ctselect_extremal_values: ; RV64: # %bb.0: -; RV64-NEXT: andi a0, a0, 1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: lui a1, 524288 -; RV64-NEXT: subw a0, a1, a0 +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_extremal_values: ; RV32: # %bb.0: ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: lui a1, 524288 -; RV32-NEXT: addi a2, a0, -1 -; RV32-NEXT: neg a0, a0 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: srli a0, a0, 1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: sub a0, a1, a0 ; RV32-NEXT: ret %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648) ret i32 %result @@ -53,14 +51,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { ; RV64: # %bb.0: ; RV64-NEXT: slli a0, a0, 63 ; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_null_ptr: ; RV32: # %bb.0: ; RV32-NEXT: slli a0, a0, 31 ; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: and a0, a1, a0 ; RV32-NEXT: ret %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null) ret ptr %result @@ -70,22 +68,20 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) { ; RV64-LABEL: test_ctselect_function_ptr: ; RV64: # %bb.0: -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: neg a3, a0 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a1, a3, a1 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_function_ptr: ; RV32: # %bb.0: -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: neg a3, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a1, a3, a1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a2, a0 ; RV32-NEXT: ret %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2) ret ptr %result @@ -97,22 +93,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { ; RV64: # %bb.0: ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xor a2, a2, a3 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a2, a0, a2 -; RV64-NEXT: not a0, a0 -; RV64-NEXT: and a0, a0, a3 -; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: xor a0, a3, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_ptr_cmp: ; RV32: # %bb.0: ; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: snez a0, a0 +; RV32-NEXT: xor a2, a2, a3 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a2, a0, a2 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: xor a0, a3, a0 ; RV32-NEXT: ret %cmp = icmp eq ptr %p1, %p2 %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b) @@ -125,22 +119,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) { ; RV64-LABEL: test_ctselect_struct_ptr: ; RV64: # %bb.0: -; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: neg a3, a0 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a1, a3, a1 -; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: xor a1, a1, a2 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: srai a0, a0, 63 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_struct_ptr: ; RV32: # %bb.0: -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: neg a3, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a1, a3, a1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a2, a0 ; RV32-NEXT: ret %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) ret ptr %result @@ -162,44 +154,40 @@ define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, ; RV64-NEXT: srai a1, a1, 63 ; RV64-NEXT: srai a2, a2, 63 ; RV64-NEXT: and a0, a4, a0 -; RV64-NEXT: xor a0, a0, a5 +; RV64-NEXT: xor a0, a5, a0 ; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: xor a1, a7, t0 -; RV64-NEXT: xor a0, a0, a6 +; RV64-NEXT: xor a0, a6, a0 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: srai a3, a3, 63 ; RV64-NEXT: and a0, a0, a3 -; RV64-NEXT: xor a0, a0, t0 +; RV64-NEXT: xor a0, t0, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_deeply_nested: ; RV32: # %bb.0: ; RV32-NEXT: lw t0, 0(sp) -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: andi a3, a3, 1 -; RV32-NEXT: neg t1, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a4, t1, a4 -; RV32-NEXT: neg t1, a1 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a0, a5 -; RV32-NEXT: neg a5, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a1, a6 -; RV32-NEXT: neg a6, a3 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a2, a7 -; RV32-NEXT: or a0, a4, a0 -; RV32-NEXT: and a0, t1, a0 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a0, a5, a0 -; RV32-NEXT: or a0, a0, a2 -; RV32-NEXT: and a0, a6, a0 -; RV32-NEXT: and a1, a3, t0 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a4, a4, a5 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: xor a6, a6, a7 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: slli a3, a3, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: srai a2, a2, 31 +; RV32-NEXT: and a0, a4, a0 +; RV32-NEXT: xor a0, a5, a0 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: xor a1, a7, t0 +; RV32-NEXT: xor a0, a6, a0 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a1, a0 +; RV32-NEXT: srai a3, a3, 31 +; RV32-NEXT: and a0, a0, a3 +; RV32-NEXT: xor a0, t0, a0 ; RV32-NEXT: ret %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll index 1149971fd090..5fbc36d07db8 100644 --- a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll +++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll @@ -7,13 +7,13 @@ define i32 @test_ctselect_smin_zero(i32 %x) { ; RV64-LABEL: test_ctselect_smin_zero: ; RV64: # %bb.0: ; RV64-NEXT: sraiw a1, a0, 31 -; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_smin_zero: ; RV32: # %bb.0: ; RV32-NEXT: srai a1, a0, 31 -; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret %cmp = icmp slt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) @@ -27,14 +27,14 @@ define i32 @test_ctselect_smax_zero(i32 %x) { ; RV64-NEXT: sext.w a1, a0 ; RV64-NEXT: sgtz a1, a1 ; RV64-NEXT: neg a1, a1 -; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_smax_zero: ; RV32: # %bb.0: ; RV32-NEXT: sgtz a1, a0 ; RV32-NEXT: neg a1, a1 -; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: ret %cmp = icmp sgt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) @@ -51,17 +51,16 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) { ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: neg a2, a2 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_smin_generic: ; RV32: # %bb.0: ; RV32-NEXT: slt a2, a0, a1 -; RV32-NEXT: neg a3, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret %cmp = icmp slt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) @@ -78,17 +77,16 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) { ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: neg a2, a2 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_smax_generic: ; RV32: # %bb.0: ; RV32-NEXT: slt a2, a1, a0 -; RV32-NEXT: neg a3, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret %cmp = icmp sgt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) @@ -105,17 +103,16 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) { ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: neg a2, a2 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_umin_generic: ; RV32: # %bb.0: ; RV32-NEXT: sltu a2, a0, a1 -; RV32-NEXT: neg a3, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret %cmp = icmp ult i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) @@ -132,17 +129,16 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) { ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: neg a2, a2 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_umax_generic: ; RV32: # %bb.0: ; RV32-NEXT: sltu a2, a1, a0 -; RV32-NEXT: neg a3, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a0, a0, a1 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret %cmp = icmp ugt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) @@ -157,17 +153,16 @@ define i32 @test_ctselect_abs(i32 %x) { ; RV64-NEXT: xor a1, a1, a0 ; RV64-NEXT: sraiw a2, a0, 31 ; RV64-NEXT: and a1, a1, a2 -; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_abs: ; RV32: # %bb.0: ; RV32-NEXT: neg a1, a0 +; RV32-NEXT: xor a1, a1, a0 ; RV32-NEXT: srai a2, a0, 31 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: not a2, a2 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: xor a0, a0, a1 ; RV32-NEXT: ret %neg = sub i32 0, %x %cmp = icmp slt i32 %x, 0 @@ -183,17 +178,16 @@ define i32 @test_ctselect_nabs(i32 %x) { ; RV64-NEXT: xor a2, a0, a1 ; RV64-NEXT: sraiw a0, a0, 31 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: xor a0, a0, a1 +; RV64-NEXT: xor a0, a1, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_nabs: ; RV32: # %bb.0: ; RV32-NEXT: neg a1, a0 -; RV32-NEXT: srai a2, a0, 31 +; RV32-NEXT: xor a2, a0, a1 +; RV32-NEXT: srai a0, a0, 31 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: not a2, a2 -; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a0, a1, a0 ; RV32-NEXT: ret %neg = sub i32 0, %x %cmp = icmp slt i32 %x, 0 @@ -270,12 +264,7 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) { ; ; RV32-LABEL: test_ctselect_identical_operands: ; RV32: # %bb.0: -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: neg a2, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a2, a2, a1 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x) ret i32 %result @@ -288,22 +277,21 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) { ; RV64-NEXT: sext.w a1, a1 ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: xor a0, a0, a1 -; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: xor a2, a2, a3 +; RV64-NEXT: snez a0, a0 +; RV64-NEXT: xor a3, a3, a2 ; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: xor a0, a0, a3 +; RV64-NEXT: and a0, a3, a0 +; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_inverted_condition: ; RV32: # %bb.0: ; RV32-NEXT: xor a0, a0, a1 -; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: snez a0, a0 +; RV32-NEXT: xor a3, a3, a2 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a2, a0, a2 -; RV32-NEXT: not a0, a0 -; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: or a0, a2, a0 +; RV32-NEXT: and a0, a3, a0 +; RV32-NEXT: xor a0, a2, a0 ; RV32-NEXT: ret %cmp = icmp eq i32 %x, %y %not_cmp = xor i1 %cmp, true @@ -324,34 +312,31 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, ; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: srai a1, a1, 63 ; RV64-NEXT: and a0, a3, a0 -; RV64-NEXT: xor a0, a0, a4 +; RV64-NEXT: xor a0, a4, a0 ; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: xor a0, a0, a5 +; RV64-NEXT: xor a0, a5, a0 ; RV64-NEXT: srai a2, a2, 63 ; RV64-NEXT: and a0, a0, a2 -; RV64-NEXT: xor a0, a0, a6 +; RV64-NEXT: xor a0, a6, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_chain: ; RV32: # %bb.0: -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: andi a2, a2, 1 -; RV32-NEXT: neg a7, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a3, a7, a3 -; RV32-NEXT: neg a7, a1 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a4, a2 -; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: and a1, a1, a5 -; RV32-NEXT: or a0, a3, a0 -; RV32-NEXT: and a0, a7, a0 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: and a0, a4, a0 -; RV32-NEXT: and a1, a2, a6 -; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: xor a3, a3, a4 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: xor a4, a4, a5 +; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: xor a5, a5, a6 +; RV32-NEXT: slli a2, a2, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: srai a1, a1, 31 +; RV32-NEXT: and a0, a3, a0 +; RV32-NEXT: xor a0, a4, a0 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: xor a0, a5, a0 +; RV32-NEXT: srai a2, a2, 31 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: xor a0, a6, a0 ; RV32-NEXT: ret %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) @@ -364,14 +349,14 @@ define i64 @test_ctselect_i64_smin_zero(i64 %x) { ; RV64-LABEL: test_ctselect_i64_smin_zero: ; RV64: # %bb.0: ; RV64-NEXT: srai a1, a0, 63 -; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret ; ; RV32-LABEL: test_ctselect_i64_smin_zero: ; RV32: # %bb.0: ; RV32-NEXT: srai a2, a1, 31 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: and a1, a2, a1 +; RV32-NEXT: and a0, a0, a2 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: ret %cmp = icmp slt i64 %x, 0 %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0) diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll index a02e1e474944..6e5d3e72e14f 100644 --- a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll +++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll @@ -14,12 +14,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4i32_basic: @@ -29,12 +24,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4i32_basic: @@ -44,12 +34,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4i32_basic: @@ -59,12 +44,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %r @@ -74,70 +54,46 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, define <vscale x 4 x i32> @ctsel_nxv4i32_load(i1 %cond, ptr %p1, ptr %p2) { ; RV64-LABEL: ctsel_nxv4i32_load: ; RV64: # %bb.0: -; RV64-NEXT: vl2re32.v v8, (a1) -; RV64-NEXT: vl2re32.v v10, (a2) +; RV64-NEXT: vl2re32.v v8, (a2) ; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v12, a0 -; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv.v.x v10, a0 +; RV64-NEXT: vmsne.vi v0, v10, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vle32.v v8, (a1), v0.t ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4i32_load: ; RV32: # %bb.0: -; RV32-NEXT: vl2re32.v v8, (a1) -; RV32-NEXT: vl2re32.v v10, (a2) +; RV32-NEXT: vl2re32.v v8, (a2) ; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v12, a0 -; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vmsne.vi v0, v10, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vle32.v v8, (a1), v0.t ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4i32_load: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: vl2re32.v v8, (a1) -; RV32-V128-NEXT: vl2re32.v v10, (a2) +; RV32-V128-NEXT: vl2re32.v v8, (a2) ; RV32-V128-NEXT: andi a0, a0, 1 -; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v12, a0 -; RV32-V128-NEXT: vmsne.vi v0, v12, 0 -; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v10, a0 +; RV32-V128-NEXT: vmsne.vi v0, v10, 0 +; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-V128-NEXT: vle32.v v8, (a1), v0.t ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4i32_load: ; RV64-V256: # %bb.0: -; RV64-V256-NEXT: vl2re32.v v8, (a1) -; RV64-V256-NEXT: vl2re32.v v10, (a2) +; RV64-V256-NEXT: vl2re32.v v8, (a2) ; RV64-V256-NEXT: andi a0, a0, 1 -; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-V256-NEXT: vmv.v.x v12, a0 -; RV64-V256-NEXT: vmsne.vi v0, v12, 0 -; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-V256-NEXT: vmv.v.x v10, a0 +; RV64-V256-NEXT: vmsne.vi v0, v10, 0 +; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-V256-NEXT: vle32.v v8, (a1), v0.t ; RV64-V256-NEXT: ret %a = load <vscale x 4 x i32>, ptr %p1, align 16 %b = load <vscale x 4 x i32>, ptr %p2, align 16 @@ -155,16 +111,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; RV64-NEXT: vadd.vv v10, v10, v10 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vs2r.v v8, (a3) +; RV64-NEXT: vadd.vv v10, v8, v8, v0.t +; RV64-NEXT: vs2r.v v10, (a3) ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4i32_mixed: @@ -175,16 +125,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { ; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; RV32-NEXT: vadd.vv v10, v10, v10 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vs2r.v v8, (a3) +; RV32-NEXT: vadd.vv v10, v8, v8, v0.t +; RV32-NEXT: vs2r.v v10, (a3) ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4i32_mixed: @@ -195,16 +139,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { ; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 -; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vadd.vv v8, v8, v8 +; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; RV32-V128-NEXT: vadd.vv v10, v10, v10 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 -; RV32-V128-NEXT: vs2r.v v8, (a3) +; RV32-V128-NEXT: vadd.vv v10, v8, v8, v0.t +; RV32-V128-NEXT: vs2r.v v10, (a3) ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4i32_mixed: @@ -215,16 +153,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { ; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 -; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vadd.vv v8, v8, v8 +; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; RV64-V256-NEXT: vadd.vv v10, v10, v10 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 -; RV64-V256-NEXT: vs2r.v v8, (a3) +; RV64-V256-NEXT: vadd.vv v10, v8, v8, v0.t +; RV64-V256-NEXT: vs2r.v v10, (a3) ; RV64-V256-NEXT: ret %a = load <vscale x 4 x i32>, ptr %p1, align 16 %b = load <vscale x 4 x i32>, ptr %p2, align 16 @@ -290,105 +222,65 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_chain(i1 %c1, i1 %c2, ; RV64-LABEL: ctsel_nxv4i32_chain: ; RV64: # %bb.0: ; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v14, 0 +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv.v.x v14, a0 +; RV64-NEXT: vmsne.vi v0, v14, 0 ; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 -; RV64-NEXT: vmsne.vi v0, v16, 0 -; RV64-NEXT: vmv.v.x v18, a1 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v16, v14, -1, v0 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vmsne.vi v0, v18, 0 +; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmerge.vim v14, v14, -1, v0 -; RV64-NEXT: vand.vv v8, v16, v8 -; RV64-NEXT: vnot.v v16, v16 -; RV64-NEXT: vand.vv v10, v16, v10 -; RV64-NEXT: vnot.v v16, v14 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vand.vv v8, v14, v8 -; RV64-NEXT: vand.vv v10, v16, v12 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4i32_chain: ; RV32: # %bb.0: ; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v14, 0 +; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vmv.v.x v14, a0 +; RV32-NEXT: vmsne.vi v0, v14, 0 ; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v16, a0 -; RV32-NEXT: vmsne.vi v0, v16, 0 -; RV32-NEXT: vmv.v.x v18, a1 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v16, v14, -1, v0 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vmsne.vi v0, v18, 0 +; RV32-NEXT: vmv.v.x v10, a1 +; RV32-NEXT: vmsne.vi v0, v10, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmerge.vim v14, v14, -1, v0 -; RV32-NEXT: vand.vv v8, v16, v8 -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vand.vv v10, v16, v10 -; RV32-NEXT: vnot.v v16, v14 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vand.vv v8, v14, v8 -; RV32-NEXT: vand.vv v10, v16, v12 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4i32_chain: ; RV32-V128: # %bb.0: ; RV32-V128-NEXT: andi a0, a0, 1 -; RV32-V128-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v14, 0 +; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v14, a0 +; RV32-V128-NEXT: vmsne.vi v0, v14, 0 ; RV32-V128-NEXT: andi a1, a1, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v16, a0 -; RV32-V128-NEXT: vmsne.vi v0, v16, 0 -; RV32-V128-NEXT: vmv.v.x v18, a1 ; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmerge.vim v16, v14, -1, v0 +; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-V128-NEXT: vmsne.vi v0, v18, 0 +; RV32-V128-NEXT: vmv.v.x v10, a1 +; RV32-V128-NEXT: vmsne.vi v0, v10, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmerge.vim v14, v14, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v16, v8 -; RV32-V128-NEXT: vnot.v v16, v16 -; RV32-V128-NEXT: vand.vv v10, v16, v10 -; RV32-V128-NEXT: vnot.v v16, v14 -; RV32-V128-NEXT: vor.vv v8, v8, v10 -; RV32-V128-NEXT: vand.vv v8, v14, v8 -; RV32-V128-NEXT: vand.vv v10, v16, v12 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4i32_chain: ; RV64-V256: # %bb.0: ; RV64-V256-NEXT: andi a0, a0, 1 -; RV64-V256-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v14, 0 +; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-V256-NEXT: vmv.v.x v14, a0 +; RV64-V256-NEXT: vmsne.vi v0, v14, 0 ; RV64-V256-NEXT: andi a1, a1, 1 -; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-V256-NEXT: vmv.v.x v16, a0 -; RV64-V256-NEXT: vmsne.vi v0, v16, 0 -; RV64-V256-NEXT: vmv.v.x v18, a1 ; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmerge.vim v16, v14, -1, v0 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-V256-NEXT: vmsne.vi v0, v18, 0 +; RV64-V256-NEXT: vmv.v.x v10, a1 +; RV64-V256-NEXT: vmsne.vi v0, v10, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmerge.vim v14, v14, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v16, v8 -; RV64-V256-NEXT: vnot.v v16, v16 -; RV64-V256-NEXT: vand.vv v10, v16, v10 -; RV64-V256-NEXT: vnot.v v16, v14 -; RV64-V256-NEXT: vor.vv v8, v8, v10 -; RV64-V256-NEXT: vand.vv v8, v14, v8 -; RV64-V256-NEXT: vand.vv v10, v16, v12 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-V256-NEXT: ret <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, @@ -407,12 +299,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a, ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv8i16_basic: @@ -422,12 +309,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a, ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv8i16_basic: @@ -437,12 +319,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a, ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv8i16_basic: @@ -452,12 +329,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a, ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 8 x i16> @llvm.ct.select.nxv8i16(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) ret <vscale x 8 x i16> %r @@ -470,12 +342,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a, ; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv16i8_basic: @@ -484,12 +351,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a, ; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv16i8_basic: @@ -498,12 +360,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a, ; RV32-V128-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv16i8_basic: @@ -512,12 +369,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a, ; RV64-V256-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 16 x i8> @llvm.ct.select.nxv16i8(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) ret <vscale x 16 x i8> %r @@ -532,42 +384,37 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a, ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv2i64_basic: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v10 ; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v10, v8 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv2i64_basic: ; RV32-V128: # %bb.0: +; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-V128-NEXT: vxor.vv v8, v8, v10 ; RV32-V128-NEXT: andi a0, a0, 1 -; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vmv.v.i v12, 0 ; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vand.vv v8, v8, v12 +; RV32-V128-NEXT: vxor.vv v8, v10, v8 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv2i64_basic: @@ -577,12 +424,7 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a, ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 2 x i64> @llvm.ct.select.nxv2i64(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) ret <vscale x 2 x i64> %r @@ -597,12 +439,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4f32_basic: @@ -612,12 +449,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v12, 0 -; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4f32_basic: @@ -627,12 +459,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v12, 0 -; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4f32_basic: @@ -642,12 +469,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b) ret <vscale x 4 x float> %r @@ -657,74 +479,50 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> define <vscale x 4 x float> @ctsel_nxv4f32_arith(i1 %cond, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { ; RV64-LABEL: ctsel_nxv4f32_arith: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-NEXT: vfadd.vv v12, v8, v10 -; RV64-NEXT: vfsub.vv v8, v8, v10 ; RV64-NEXT: andi a0, a0, 1 -; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v10, a0 -; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v10, v10, -1, v0 -; RV64-NEXT: vand.vv v12, v10, v12 -; RV64-NEXT: vnot.v v10, v10 -; RV64-NEXT: vand.vv v8, v10, v8 -; RV64-NEXT: vor.vv v8, v12, v8 +; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vmv.v.x v12, a0 +; RV64-NEXT: vmsne.vi v0, v12, 0 +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-NEXT: vfsub.vv v12, v8, v10 +; RV64-NEXT: vfadd.vv v12, v8, v10, v0.t +; RV64-NEXT: vmv.v.v v8, v12 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv4f32_arith: ; RV32: # %bb.0: -; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-NEXT: vfadd.vv v12, v8, v10 -; RV32-NEXT: vfsub.vv v8, v8, v10 ; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v10, a0 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v10, v10, -1, v0 -; RV32-NEXT: vand.vv v12, v10, v12 -; RV32-NEXT: vnot.v v10, v10 -; RV32-NEXT: vand.vv v8, v10, v8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-NEXT: vmv.v.x v12, a0 +; RV32-NEXT: vmsne.vi v0, v12, 0 +; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-NEXT: vfsub.vv v12, v8, v10 +; RV32-NEXT: vfadd.vv v12, v8, v10, v0.t +; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv4f32_arith: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vfadd.vv v12, v8, v10 -; RV32-V128-NEXT: vfsub.vv v8, v8, v10 ; RV32-V128-NEXT: andi a0, a0, 1 -; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v10, a0 -; RV32-V128-NEXT: vmsne.vi v0, v10, 0 -; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV32-V128-NEXT: vmv.v.i v10, 0 -; RV32-V128-NEXT: vmerge.vim v10, v10, -1, v0 -; RV32-V128-NEXT: vand.vv v12, v10, v12 -; RV32-V128-NEXT: vnot.v v10, v10 -; RV32-V128-NEXT: vand.vv v8, v10, v8 -; RV32-V128-NEXT: vor.vv v8, v12, v8 +; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v12, a0 +; RV32-V128-NEXT: vmsne.vi v0, v12, 0 +; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV32-V128-NEXT: vfsub.vv v12, v8, v10 +; RV32-V128-NEXT: vfadd.vv v12, v8, v10, v0.t +; RV32-V128-NEXT: vmv.v.v v8, v12 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv4f32_arith: ; RV64-V256: # %bb.0: -; RV64-V256-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vfadd.vv v12, v8, v10 -; RV64-V256-NEXT: vfsub.vv v8, v8, v10 ; RV64-V256-NEXT: andi a0, a0, 1 -; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-V256-NEXT: vmv.v.x v10, a0 -; RV64-V256-NEXT: vmsne.vi v0, v10, 0 -; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v10, 0 -; RV64-V256-NEXT: vmerge.vim v10, v10, -1, v0 -; RV64-V256-NEXT: vand.vv v12, v10, v12 -; RV64-V256-NEXT: vnot.v v10, v10 -; RV64-V256-NEXT: vand.vv v8, v10, v8 -; RV64-V256-NEXT: vor.vv v8, v12, v8 +; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-V256-NEXT: vmv.v.x v12, a0 +; RV64-V256-NEXT: vmsne.vi v0, v12, 0 +; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; RV64-V256-NEXT: vfsub.vv v12, v8, v10 +; RV64-V256-NEXT: vfadd.vv v12, v8, v10, v0.t +; RV64-V256-NEXT: vmv.v.v v8, v12 ; RV64-V256-NEXT: ret %sum = fadd <vscale x 4 x float> %x, %y %diff = fsub <vscale x 4 x float> %x, %y @@ -740,42 +538,37 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double ; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vmsne.vi v0, v12, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.v.i v12, 0 -; RV64-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-NEXT: vand.vv v8, v12, v8 -; RV64-NEXT: vnot.v v12, v12 -; RV64-NEXT: vand.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: ret ; ; RV32-LABEL: ctsel_nxv2f64_basic: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v10 ; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv.v.x v12, a0 ; RV32-NEXT: vmsne.vi v0, v12, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vmv.v.i v12, 0 ; RV32-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-NEXT: vand.vv v8, v12, v8 -; RV32-NEXT: vnot.v v12, v12 -; RV32-NEXT: vand.vv v10, v12, v10 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vxor.vv v8, v10, v8 ; RV32-NEXT: ret ; ; RV32-V128-LABEL: ctsel_nxv2f64_basic: ; RV32-V128: # %bb.0: +; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; RV32-V128-NEXT: vxor.vv v8, v8, v10 ; RV32-V128-NEXT: andi a0, a0, 1 -; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV32-V128-NEXT: vmv.v.x v12, a0 ; RV32-V128-NEXT: vmsne.vi v0, v12, 0 ; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vmv.v.i v12, 0 ; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0 -; RV32-V128-NEXT: vand.vv v8, v12, v8 -; RV32-V128-NEXT: vnot.v v12, v12 -; RV32-V128-NEXT: vand.vv v10, v12, v10 -; RV32-V128-NEXT: vor.vv v8, v8, v10 +; RV32-V128-NEXT: vand.vv v8, v8, v12 +; RV32-V128-NEXT: vxor.vv v8, v10, v8 ; RV32-V128-NEXT: ret ; ; RV64-V256-LABEL: ctsel_nxv2f64_basic: @@ -785,12 +578,7 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double ; RV64-V256-NEXT: vmv.v.x v12, a0 ; RV64-V256-NEXT: vmsne.vi v0, v12, 0 ; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-V256-NEXT: vmv.v.i v12, 0 -; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0 -; RV64-V256-NEXT: vand.vv v8, v12, v8 -; RV64-V256-NEXT: vnot.v v12, v12 -; RV64-V256-NEXT: vand.vv v10, v12, v10 -; RV64-V256-NEXT: vor.vv v8, v8, v10 +; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-V256-NEXT: ret %r = call <vscale x 2 x double> @llvm.ct.select.nxv2f64(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b) ret <vscale x 2 x double> %r diff --git a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll index 255d575ca8f9..6020a9cded35 100644 --- a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll +++ b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll @@ -40,17 +40,16 @@ define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) { ; RV64-NEXT: slli a0, a0, 63 ; RV64-NEXT: srai a0, a0, 63 ; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: xor a0, a0, a2 +; RV64-NEXT: xor a0, a2, a0 ; RV64-NEXT: ret ; ; RV32-LABEL: test_protected_no_branch: ; RV32: # %bb.0: -; RV32-NEXT: andi a0, a0, 1 -; RV32-NEXT: neg a3, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a1, a3, a1 -; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: xor a1, a1, a2 +; RV32-NEXT: slli a0, a0, 31 +; RV32-NEXT: srai a0, a0, 31 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: xor a0, a2, a0 ; RV32-NEXT: ret %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result @@ -86,7 +85,7 @@ false: ret i32 %b } -; Test 5: Regular select (not ct.select) - whatever wasm wants to do +; Test 5: Regular select (not ct.select) define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) { ; RV64-LABEL: test_regular_select: ; RV64: # %bb.0: |
