diff options
| author | wizardengineer <juliuswoosebert@gmail.com> | 2026-03-07 15:38:15 -0500 |
|---|---|---|
| committer | wizardengineer <juliuswoosebert@gmail.com> | 2026-03-07 16:39:57 -0500 |
| commit | 9d45e799ab7c4dea36c4c8df5ed757a92886c814 (patch) | |
| tree | 1d58dc1b5a08bf10e6732d72b8021280f759ff2a | |
| parent | 89b18c0965ccec867ad8e234c77ac7f83c7edfc5 (diff) | |
| download | llvm-users/wizardengineer/ct-select-mips.tar.gz llvm-users/wizardengineer/ct-select-mips.tar.bz2 llvm-users/wizardengineer/ct-select-mips.zip | |
[LLVM][MIPS] Regenerate ct.select test CHECK linesusers/wizardengineer/ct-select-mips
Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
| -rw-r--r-- | llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll | 223 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll | 297 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll | 570 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/ctselect-fallback.ll | 258 | ||||
| -rw-r--r-- | llvm/test/CodeGen/Mips/ctselect-side-effects.ll | 24 |
5 files changed, 617 insertions, 755 deletions
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll index f1831a625d4a..401a742c27ea 100644 --- a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll @@ -8,22 +8,24 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { ; M32-LABEL: test_ctselect_i1: ; M32: # %bb.0: -; M32-NEXT: xori $2, $4, 1 -; M32-NEXT: and $1, $4, $5 -; M32-NEXT: and $2, $2, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $1, $2 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_i1: ; M64: # %bb.0: -; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $1, $6, 0 -; M64-NEXT: xori $2, $2, 1 -; M64-NEXT: and $1, $2, $1 -; M64-NEXT: and $2, $4, $5 +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: xor $2, $5, $6 +; M64-NEXT: andi $1, $1, 1 ; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b) ret i1 %result } @@ -32,30 +34,18 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) { define i32 @test_ctselect_extremal_values(i1 %cond) { ; M32-LABEL: test_ctselect_extremal_values: ; M32: # %bb.0: -; M32-NEXT: lui $3, 32767 ; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: ori $3, $3, 65535 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $3 -; M32-NEXT: lui $3, 32768 -; M32-NEXT: and $1, $1, $3 +; M32-NEXT: lui $2, 32768 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: subu $2, $2, $1 ; ; M64-LABEL: test_ctselect_extremal_values: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: lui $3, 32767 +; M64-NEXT: lui $2, 32768 ; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: ori $3, $3, 65535 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: lui $3, 32768 -; M64-NEXT: and $1, $1, $3 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: subu $2, $2, $1 %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648) ret i32 %result } @@ -67,14 +57,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { ; M32-NEXT: andi $1, $4, 1 ; M32-NEXT: negu $1, $1 ; M32-NEXT: jr $ra -; M32-NEXT: and $2, $1, $5 +; M32-NEXT: and $2, $5, $1 ; ; M64-LABEL: test_ctselect_null_ptr: ; M64: # %bb.0: ; M64-NEXT: andi $1, $4, 1 ; M64-NEXT: dnegu $1, $1 ; M64-NEXT: jr $ra -; M64-NEXT: and $2, $1, $5 +; M64-NEXT: and $2, $5, $1 %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null) ret ptr %result } @@ -83,23 +73,21 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) { define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) { ; M32-LABEL: test_ctselect_function_ptr: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_function_ptr: ; M64: # %bb.0: -; M64-NEXT: andi $1, $4, 1 -; M64-NEXT: dnegu $2, $1 -; M64-NEXT: daddiu $1, $1, -1 -; M64-NEXT: and $2, $2, $5 -; M64-NEXT: and $1, $1, $6 +; M64-NEXT: andi $2, $4, 1 +; M64-NEXT: xor $1, $5, $6 +; M64-NEXT: dnegu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $6, $1 %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2) ret ptr %result } @@ -108,26 +96,25 @@ define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) { define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { ; M32-LABEL: test_ctselect_ptr_cmp: ; M32: # %bb.0: -; M32-NEXT: xor $1, $4, $5 -; M32-NEXT: sltu $1, $zero, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: xor $2, $4, $5 +; M32-NEXT: xor $1, $6, $7 +; M32-NEXT: sltiu $2, $2, 1 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $7, $1 ; ; M64-LABEL: test_ctselect_ptr_cmp: ; M64: # %bb.0: -; M64-NEXT: xor $1, $4, $5 -; M64-NEXT: daddiu $3, $zero, -1 -; M64-NEXT: daddiu $2, $zero, -1 -; M64-NEXT: movn $3, $zero, $1 -; M64-NEXT: xor $2, $3, $2 -; M64-NEXT: and $1, $3, $6 -; M64-NEXT: and $2, $2, $7 +; M64-NEXT: xor $2, $4, $5 +; M64-NEXT: xor $1, $6, $7 +; M64-NEXT: sltiu $2, $2, 1 +; M64-NEXT: dsll $2, $2, 32 +; M64-NEXT: dsrl $2, $2, 32 +; M64-NEXT: dnegu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $2 +; M64-NEXT: xor $2, $7, $1 %cmp = icmp eq ptr %p1, %p2 %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b) ret ptr %result @@ -139,23 +126,21 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) { define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) { ; M32-LABEL: test_ctselect_struct_ptr: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_struct_ptr: ; M64: # %bb.0: -; M64-NEXT: andi $1, $4, 1 -; M64-NEXT: dnegu $2, $1 -; M64-NEXT: daddiu $1, $1, -1 -; M64-NEXT: and $2, $2, $5 -; M64-NEXT: and $1, $1, $6 +; M64-NEXT: andi $2, $4, 1 +; M64-NEXT: xor $1, $5, $6 +; M64-NEXT: dnegu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $6, $1 %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) ret ptr %result } @@ -164,73 +149,65 @@ define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) { define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { ; M32-LABEL: test_ctselect_deeply_nested: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: lw $3, 16($sp) -; M32-NEXT: lw $9, 32($sp) -; M32-NEXT: lw $8, 28($sp) -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 +; M32-NEXT: lw $1, 20($sp) +; M32-NEXT: lw $2, 16($sp) +; M32-NEXT: andi $3, $4, 1 +; M32-NEXT: andi $4, $6, 1 +; M32-NEXT: lw $6, 28($sp) +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $2, $2, $1 ; M32-NEXT: and $2, $2, $3 -; M32-NEXT: lw $3, 20($sp) -; M32-NEXT: and $1, $1, $3 ; M32-NEXT: andi $3, $5, 1 -; M32-NEXT: or $1, $2, $1 -; M32-NEXT: andi $2, $6, 1 -; M32-NEXT: andi $6, $7, 1 -; M32-NEXT: negu $4, $3 -; M32-NEXT: addiu $3, $3, -1 -; M32-NEXT: addiu $7, $6, -1 -; M32-NEXT: and $1, $4, $1 -; M32-NEXT: addiu $5, $2, -1 -; M32-NEXT: negu $2, $2 -; M32-NEXT: negu $6, $6 -; M32-NEXT: and $4, $7, $9 -; M32-NEXT: lw $7, 24($sp) -; M32-NEXT: and $5, $5, $8 -; M32-NEXT: and $3, $3, $7 -; M32-NEXT: or $1, $1, $3 -; M32-NEXT: and $1, $2, $1 -; M32-NEXT: or $1, $1, $5 -; M32-NEXT: and $1, $6, $1 +; M32-NEXT: lw $5, 32($sp) +; M32-NEXT: xor $1, $1, $2 +; M32-NEXT: lw $2, 24($sp) +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $1, $1, $2 +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: andi $3, $7, 1 +; M32-NEXT: xor $1, $2, $1 +; M32-NEXT: negu $2, $4 +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $1, $1, $6 +; M32-NEXT: and $1, $1, $2 +; M32-NEXT: xor $1, $6, $1 +; M32-NEXT: xor $1, $1, $5 +; M32-NEXT: and $1, $1, $3 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $1, $4 +; M32-NEXT: xor $2, $5, $1 ; ; M64-LABEL: test_ctselect_deeply_nested: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $3, $8, 0 -; M64-NEXT: sll $4, $5, 0 -; M64-NEXT: lw $8, 0($sp) +; M64-NEXT: xor $2, $8, $9 +; M64-NEXT: sll $5, $5, 0 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: sll $6, $11, 0 +; M64-NEXT: sll $4, $7, 0 +; M64-NEXT: lw $7, 0($sp) ; M64-NEXT: andi $1, $1, 1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: andi $5, $5, 1 +; M64-NEXT: andi $3, $3, 1 ; M64-NEXT: andi $4, $4, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: negu $5, $4 -; M64-NEXT: addiu $4, $4, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: sll $3, $9, 0 -; M64-NEXT: and $1, $1, $3 -; M64-NEXT: sll $3, $11, 0 -; M64-NEXT: or $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 -; M64-NEXT: sll $6, $7, 0 -; M64-NEXT: andi $2, $2, 1 -; M64-NEXT: and $1, $5, $1 -; M64-NEXT: andi $6, $6, 1 -; M64-NEXT: addiu $5, $2, -1 -; M64-NEXT: negu $2, $2 -; M64-NEXT: addiu $7, $6, -1 -; M64-NEXT: negu $6, $6 -; M64-NEXT: and $3, $5, $3 -; M64-NEXT: sll $5, $10, 0 -; M64-NEXT: and $7, $7, $8 -; M64-NEXT: and $4, $4, $5 -; M64-NEXT: or $1, $1, $4 +; M64-NEXT: negu $1, $1 +; M64-NEXT: negu $5, $5 +; M64-NEXT: negu $4, $4 ; M64-NEXT: and $1, $2, $1 -; M64-NEXT: or $1, $1, $3 -; M64-NEXT: and $1, $6, $1 +; M64-NEXT: sll $2, $9, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: sll $2, $10, 0 +; M64-NEXT: xor $1, $1, $2 +; M64-NEXT: and $1, $1, $5 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: negu $2, $3 +; M64-NEXT: xor $1, $1, $6 +; M64-NEXT: and $1, $1, $2 +; M64-NEXT: xor $1, $6, $1 +; M64-NEXT: xor $1, $1, $7 +; M64-NEXT: and $1, $1, $4 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $7 +; M64-NEXT: xor $2, $7, $1 %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll index 2e65e586ce5f..a1c5d524c693 100644 --- a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll @@ -6,16 +6,18 @@ define i32 @test_ctselect_smin_zero(i32 %x) { ; M32-LABEL: test_ctselect_smin_zero: ; M32: # %bb.0: -; M32-NEXT: sra $1, $4, 31 +; M32-NEXT: slti $1, $4, 0 +; M32-NEXT: negu $1, $1 ; M32-NEXT: jr $ra -; M32-NEXT: and $2, $1, $4 +; M32-NEXT: and $2, $4, $1 ; ; M64-LABEL: test_ctselect_smin_zero: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sra $2, $1, 31 +; M64-NEXT: slti $2, $1, 0 +; M64-NEXT: negu $2, $2 ; M64-NEXT: jr $ra -; M64-NEXT: and $2, $2, $1 +; M64-NEXT: and $2, $1, $2 %cmp = icmp slt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) ret i32 %result @@ -25,17 +27,18 @@ define i32 @test_ctselect_smin_zero(i32 %x) { define i32 @test_ctselect_smax_zero(i32 %x) { ; M32-LABEL: test_ctselect_smax_zero: ; M32: # %bb.0: -; M32-NEXT: slti $1, $4, 1 -; M32-NEXT: movn $4, $zero, $1 +; M32-NEXT: slt $1, $zero, $4 +; M32-NEXT: negu $1, $1 ; M32-NEXT: jr $ra -; M32-NEXT: move $2, $4 +; M32-NEXT: and $2, $4, $1 ; ; M64-LABEL: test_ctselect_smax_zero: ; M64: # %bb.0: -; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: slti $1, $2, 1 +; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: slt $2, $zero, $1 +; M64-NEXT: negu $2, $2 ; M64-NEXT: jr $ra -; M64-NEXT: movn $2, $zero, $1 +; M64-NEXT: and $2, $1, $2 %cmp = icmp sgt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0) ret i32 %result @@ -45,27 +48,23 @@ define i32 @test_ctselect_smax_zero(i32 %x) { define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) { ; M32-LABEL: test_ctselect_smin_generic: ; M32: # %bb.0: -; M32-NEXT: slt $1, $4, $5 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $4 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $5 +; M32-NEXT: slt $2, $4, $5 +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $5, $1 ; ; M64-LABEL: test_ctselect_smin_generic: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: slt $3, $2, $1 -; M64-NEXT: xori $3, $3, 1 -; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: xor $3, $2, $1 +; M64-NEXT: slt $2, $2, $1 +; M64-NEXT: negu $2, $2 ; M64-NEXT: and $2, $3, $2 -; M64-NEXT: not $3, $3 -; M64-NEXT: and $1, $3, $1 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $1, $2 %cmp = icmp slt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) ret i32 %result @@ -75,27 +74,23 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) { define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) { ; M32-LABEL: test_ctselect_smax_generic: ; M32: # %bb.0: -; M32-NEXT: slt $1, $5, $4 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $4 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $5 +; M32-NEXT: slt $2, $5, $4 +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $5, $1 ; ; M64-LABEL: test_ctselect_smax_generic: ; M64: # %bb.0: -; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $2, $5, 0 -; M64-NEXT: slt $3, $2, $1 -; M64-NEXT: xori $3, $3, 1 -; M64-NEXT: addiu $3, $3, -1 -; M64-NEXT: and $1, $3, $1 -; M64-NEXT: not $3, $3 +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: xor $3, $2, $1 +; M64-NEXT: slt $2, $1, $2 +; M64-NEXT: negu $2, $2 ; M64-NEXT: and $2, $3, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $2 +; M64-NEXT: xor $2, $1, $2 %cmp = icmp sgt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) ret i32 %result @@ -105,27 +100,23 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) { define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) { ; M32-LABEL: test_ctselect_umin_generic: ; M32: # %bb.0: -; M32-NEXT: sltu $1, $4, $5 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $4 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $5 +; M32-NEXT: sltu $2, $4, $5 +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $5, $1 ; ; M64-LABEL: test_ctselect_umin_generic: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sltu $3, $2, $1 -; M64-NEXT: xori $3, $3, 1 -; M64-NEXT: addiu $3, $3, -1 +; M64-NEXT: xor $3, $2, $1 +; M64-NEXT: sltu $2, $2, $1 +; M64-NEXT: negu $2, $2 ; M64-NEXT: and $2, $3, $2 -; M64-NEXT: not $3, $3 -; M64-NEXT: and $1, $3, $1 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $1, $2 %cmp = icmp ult i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) ret i32 %result @@ -135,27 +126,23 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) { define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) { ; M32-LABEL: test_ctselect_umax_generic: ; M32: # %bb.0: -; M32-NEXT: sltu $1, $5, $4 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $4 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $5 +; M32-NEXT: sltu $2, $5, $4 +; M32-NEXT: xor $1, $4, $5 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $5, $1 ; ; M64-LABEL: test_ctselect_umax_generic: ; M64: # %bb.0: -; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $2, $5, 0 -; M64-NEXT: sltu $3, $2, $1 -; M64-NEXT: xori $3, $3, 1 -; M64-NEXT: addiu $3, $3, -1 -; M64-NEXT: and $1, $3, $1 -; M64-NEXT: not $3, $3 +; M64-NEXT: sll $1, $5, 0 +; M64-NEXT: sll $2, $4, 0 +; M64-NEXT: xor $3, $2, $1 +; M64-NEXT: sltu $2, $1, $2 +; M64-NEXT: negu $2, $2 ; M64-NEXT: and $2, $3, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $2 +; M64-NEXT: xor $2, $1, $2 %cmp = icmp ugt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y) ret i32 %result @@ -165,24 +152,24 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) { define i32 @test_ctselect_abs(i32 %x) { ; M32-LABEL: test_ctselect_abs: ; M32: # %bb.0: -; M32-NEXT: negu $1, $4 -; M32-NEXT: sra $2, $4, 31 +; M32-NEXT: slti $1, $4, 0 +; M32-NEXT: negu $2, $4 +; M32-NEXT: negu $1, $1 +; M32-NEXT: xor $2, $2, $4 ; M32-NEXT: and $1, $2, $1 -; M32-NEXT: not $2, $2 -; M32-NEXT: and $2, $2, $4 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $1, $2 +; M32-NEXT: xor $2, $4, $1 ; ; M64-LABEL: test_ctselect_abs: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: negu $2, $1 -; M64-NEXT: sra $3, $1, 31 +; M64-NEXT: slti $2, $1, 0 +; M64-NEXT: negu $3, $1 +; M64-NEXT: negu $2, $2 +; M64-NEXT: xor $3, $3, $1 ; M64-NEXT: and $2, $3, $2 -; M64-NEXT: not $3, $3 -; M64-NEXT: and $1, $3, $1 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $1, $2 %neg = sub i32 0, %x %cmp = icmp slt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x) @@ -193,24 +180,24 @@ define i32 @test_ctselect_abs(i32 %x) { define i32 @test_ctselect_nabs(i32 %x) { ; M32-LABEL: test_ctselect_nabs: ; M32: # %bb.0: -; M32-NEXT: sra $1, $4, 31 -; M32-NEXT: negu $3, $4 -; M32-NEXT: and $2, $1, $4 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $3 +; M32-NEXT: slti $1, $4, 0 +; M32-NEXT: negu $2, $4 +; M32-NEXT: negu $1, $1 +; M32-NEXT: xor $3, $4, $2 +; M32-NEXT: and $1, $3, $1 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $2, $1 ; ; M64-LABEL: test_ctselect_nabs: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sra $2, $1, 31 -; M64-NEXT: and $3, $2, $1 -; M64-NEXT: negu $1, $1 -; M64-NEXT: not $2, $2 -; M64-NEXT: and $1, $2, $1 +; M64-NEXT: slti $2, $1, 0 +; M64-NEXT: negu $3, $1 +; M64-NEXT: negu $2, $2 +; M64-NEXT: xor $1, $1, $3 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $3, $1 +; M64-NEXT: xor $2, $3, $1 %neg = sub i32 0, %x %cmp = icmp slt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg) @@ -221,14 +208,16 @@ define i32 @test_ctselect_nabs(i32 %x) { define i32 @test_ctselect_sign_extend(i32 %x) { ; M32-LABEL: test_ctselect_sign_extend: ; M32: # %bb.0: +; M32-NEXT: slti $1, $4, 0 ; M32-NEXT: jr $ra -; M32-NEXT: sra $2, $4, 31 +; M32-NEXT: negu $2, $1 ; ; M64-LABEL: test_ctselect_sign_extend: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 +; M64-NEXT: slti $1, $1, 0 ; M64-NEXT: jr $ra -; M64-NEXT: sra $2, $1, 31 +; M64-NEXT: negu $2, $1 %cmp = icmp slt i32 %x, 0 %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0) ret i32 %result @@ -270,13 +259,12 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_constant_folding_false: ; M32: # %bb.0: ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $zero, $5 +; M32-NEXT: move $2, $5 ; ; M64-LABEL: test_ctselect_constant_folding_false: ; M64: # %bb.0: -; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $zero, $1 +; M64-NEXT: sll $2, $5, 0 %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) ret i32 %result } @@ -285,25 +273,13 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) { define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) { ; M32-LABEL: test_ctselect_identical_operands: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $5 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: move $2, $5 ; ; M64-LABEL: test_ctselect_identical_operands: ; M64: # %bb.0: -; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $3, $5, 0 -; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: and $1, $1, $3 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: sll $2, $5, 0 %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x) ret i32 %result } @@ -312,29 +288,27 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) { define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_inverted_condition: ; M32: # %bb.0: -; M32-NEXT: xor $1, $4, $5 -; M32-NEXT: sltiu $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: xor $2, $4, $5 +; M32-NEXT: xor $1, $7, $6 +; M32-NEXT: sltiu $2, $2, 1 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_inverted_condition: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $3, $7, 0 ; M64-NEXT: xor $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 +; M64-NEXT: xor $2, $7, $6 ; M64-NEXT: sltiu $1, $1, 1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $1, $2 -; M64-NEXT: not $1, $1 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %cmp = icmp eq i32 %x, %y %not_cmp = xor i1 %cmp, true %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b) @@ -345,57 +319,51 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) { define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) { ; M32-LABEL: test_ctselect_chain: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: lw $1, 16($sp) +; M32-NEXT: andi $3, $4, 1 +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $2, $7, $1 +; M32-NEXT: and $2, $2, $3 ; M32-NEXT: andi $3, $5, 1 -; M32-NEXT: lw $5, 16($sp) -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: negu $4, $3 -; M32-NEXT: addiu $3, $3, -1 -; M32-NEXT: and $1, $1, $5 -; M32-NEXT: and $2, $2, $7 -; M32-NEXT: lw $5, 24($sp) -; M32-NEXT: or $1, $2, $1 +; M32-NEXT: xor $1, $1, $2 +; M32-NEXT: lw $2, 20($sp) +; M32-NEXT: negu $3, $3 +; M32-NEXT: xor $1, $1, $2 +; M32-NEXT: and $1, $1, $3 +; M32-NEXT: lw $3, 24($sp) +; M32-NEXT: xor $1, $2, $1 ; M32-NEXT: andi $2, $6, 1 -; M32-NEXT: and $1, $4, $1 -; M32-NEXT: addiu $4, $2, -1 +; M32-NEXT: xor $1, $1, $3 ; M32-NEXT: negu $2, $2 -; M32-NEXT: and $4, $4, $5 -; M32-NEXT: lw $5, 20($sp) -; M32-NEXT: and $3, $3, $5 -; M32-NEXT: or $1, $1, $3 -; M32-NEXT: and $1, $2, $1 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $1, $4 +; M32-NEXT: xor $2, $3, $1 ; ; M64-LABEL: test_ctselect_chain: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $3, $7, 0 -; M64-NEXT: sll $4, $5, 0 +; M64-NEXT: xor $2, $7, $8 +; M64-NEXT: sll $3, $5, 0 ; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: andi $4, $4, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: negu $5, $4 -; M64-NEXT: addiu $4, $4, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: sll $3, $8, 0 -; M64-NEXT: and $1, $1, $3 -; M64-NEXT: sll $3, $6, 0 -; M64-NEXT: sll $6, $10, 0 -; M64-NEXT: or $1, $2, $1 +; M64-NEXT: sll $2, $2, 0 ; M64-NEXT: andi $3, $3, 1 -; M64-NEXT: and $1, $5, $1 -; M64-NEXT: sll $5, $9, 0 -; M64-NEXT: addiu $2, $3, -1 +; M64-NEXT: negu $1, $1 ; M64-NEXT: negu $3, $3 -; M64-NEXT: and $4, $4, $5 -; M64-NEXT: and $2, $2, $6 -; M64-NEXT: or $1, $1, $4 -; M64-NEXT: and $1, $3, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $8, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: sll $2, $9, 0 +; M64-NEXT: xor $1, $1, $2 +; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $3, $6, 0 +; M64-NEXT: xor $1, $2, $1 +; M64-NEXT: andi $2, $3, 1 +; M64-NEXT: sll $3, $10, 0 +; M64-NEXT: xor $1, $1, $3 +; M64-NEXT: negu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $2 +; M64-NEXT: xor $2, $3, $1 %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b) %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c) %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d) @@ -406,16 +374,17 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, define i64 @test_ctselect_i64_smin_zero(i64 %x) { ; M32-LABEL: test_ctselect_i64_smin_zero: ; M32: # %bb.0: -; M32-NEXT: sra $1, $5, 31 -; M32-NEXT: and $2, $1, $4 +; M32-NEXT: slti $1, $5, 0 +; M32-NEXT: negu $1, $1 +; M32-NEXT: and $2, $4, $1 ; M32-NEXT: jr $ra -; M32-NEXT: and $3, $1, $5 +; M32-NEXT: and $3, $5, $1 ; ; M64-LABEL: test_ctselect_i64_smin_zero: ; M64: # %bb.0: ; M64-NEXT: dsra $1, $4, 63 ; M64-NEXT: jr $ra -; M64-NEXT: and $2, $1, $4 +; M64-NEXT: and $2, $4, $1 %cmp = icmp slt i64 %x, 0 %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0) ret i64 %result diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll index 6222f6052e12..302e06b0a733 100644 --- a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll +++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll @@ -6,21 +6,19 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v4i32: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -30,26 +28,24 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $4 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] ; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] @@ -63,21 +59,19 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) { define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v8i16: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.h $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.h $w1, $w1, 15 -; MIPS64-MSA-NEXT: srai.h $w1, $w1, 15 -; MIPS64-MSA-NEXT: shf.h $w2, $w2, 27 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.h $w2, $w2, 27 +; MIPS64-MSA-NEXT: fill.h $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.h $w2, $w2, 15 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.h $w2, $w2, 15 +; MIPS64-MSA-NEXT: shf.h $w0, $w0, 27 +; MIPS64-MSA-NEXT: shf.h $w1, $w1, 27 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.h $w0, $w0, 27 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -87,28 +81,26 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) -; MIPS32-MSA-NEXT: fill.h $w1, $4 -; MIPS32-MSA-NEXT: ldi.b $w0, -1 -; MIPS32-MSA-NEXT: insert.w $w2[0], $2 -; MIPS32-MSA-NEXT: slli.h $w1, $w1, 15 -; MIPS32-MSA-NEXT: srai.h $w1, $w1, 15 -; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 +; MIPS32-MSA-NEXT: fill.h $w2, $4 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: slli.h $w2, $w2, 15 +; MIPS32-MSA-NEXT: srai.h $w2, $w2, 15 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: shf.h $w2, $w2, 177 -; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: insert.w $w2[0], $6 -; MIPS32-MSA-NEXT: insert.w $w2[1], $7 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 -; MIPS32-MSA-NEXT: shf.h $w2, $w2, 177 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: shf.h $w0, $w0, 177 +; MIPS32-MSA-NEXT: shf.h $w1, $w1, 177 ; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: shf.h $w0, $w0, 177 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] @@ -123,22 +115,21 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) { define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v16i8: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w0[0], $5 -; MIPS64-MSA-NEXT: insert.d $w1[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 ; MIPS64-MSA-NEXT: fill.b $w2, $1 -; MIPS64-MSA-NEXT: insert.d $w0[1], $6 -; MIPS64-MSA-NEXT: insert.d $w1[1], $8 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 ; MIPS64-MSA-NEXT: slli.b $w2, $w2, 7 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 ; MIPS64-MSA-NEXT: shf.b $w0, $w0, 27 -; MIPS64-MSA-NEXT: shf.b $w1, $w1, 27 ; MIPS64-MSA-NEXT: srai.b $w2, $w2, 7 +; MIPS64-MSA-NEXT: shf.b $w1, $w1, 27 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 -; MIPS64-MSA-NEXT: and.v $w0, $w2, $w0 -; MIPS64-MSA-NEXT: xori.b $w2, $w2, 255 -; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.b $w0, $w0, 27 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] @@ -147,29 +138,28 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { ; ; MIPS32-MSA-LABEL: test_ctselect_v16i8: ; MIPS32-MSA: # %bb.0: -; MIPS32-MSA-NEXT: insert.w $w0[0], $6 -; MIPS32-MSA-NEXT: lw $1, 16($sp) ; MIPS32-MSA-NEXT: lw $2, 24($sp) +; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.b $w2, $4 -; MIPS32-MSA-NEXT: insert.w $w0[1], $7 -; MIPS32-MSA-NEXT: insert.w $w1[0], $2 +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.b $w2, $w2, 7 ; MIPS32-MSA-NEXT: srai.b $w2, $w2, 7 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 +; MIPS32-MSA-NEXT: lw $1, 32($sp) ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 -; MIPS32-MSA-NEXT: lw $1, 20($sp) +; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 -; MIPS32-MSA-NEXT: lw $1, 28($sp) -; MIPS32-MSA-NEXT: insert.w $w1[1], $1 -; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: shf.b $w0, $w0, 27 +; MIPS32-MSA-NEXT: lw $1, 16($sp) ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 -; MIPS32-MSA-NEXT: lw $1, 36($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w2, $w0 -; MIPS32-MSA-NEXT: xori.b $w2, $w2, 255 +; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: shf.b $w0, $w0, 27 ; MIPS32-MSA-NEXT: shf.b $w1, $w1, 27 -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: shf.b $w0, $w0, 27 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] @@ -184,18 +174,16 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) { define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v2i64: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: fill.d $w2, $4 ; MIPS64-MSA-NEXT: insert.d $w0[0], $7 -; MIPS64-MSA-NEXT: ldi.b $w1, -1 -; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 -; MIPS64-MSA-NEXT: insert.d $w0[1], $8 -; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 -; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 ; MIPS64-MSA-NEXT: insert.d $w1[0], $5 +; MIPS64-MSA-NEXT: fill.d $w2, $4 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 ; MIPS64-MSA-NEXT: insert.d $w1[1], $6 -; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra ; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] @@ -214,31 +202,28 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { ; MIPS32-MSA-NEXT: and $sp, $sp, $1 ; MIPS32-MSA-NEXT: lw $2, 56($fp) ; MIPS32-MSA-NEXT: lw $1, 60($fp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: sw $4, 12($sp) ; MIPS32-MSA-NEXT: sw $4, 4($sp) -; MIPS32-MSA-NEXT: ldi.b $w0, -1 -; MIPS32-MSA-NEXT: ld.d $w1, 0($sp) -; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 -; MIPS32-MSA-NEXT: insert.w $w2[0], $2 -; MIPS32-MSA-NEXT: slli.d $w1, $w1, 63 -; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: ld.d $w2, 0($sp) +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 64($fp) -; MIPS32-MSA-NEXT: srai.d $w1, $w1, 63 -; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 68($fp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 48($fp) -; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: insert.w $w2[0], $6 -; MIPS32-MSA-NEXT: insert.w $w2[1], $7 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 52($fp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 -; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS32-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] @@ -257,21 +242,19 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) { define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v4f32: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -281,26 +264,24 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $5 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: jr $ra ; MIPS32-MSA-NEXT: st.w $w0, 0($4) %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) @@ -311,18 +292,16 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v2f64: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: fill.d $w2, $4 ; MIPS64-MSA-NEXT: insert.d $w0[0], $7 -; MIPS64-MSA-NEXT: ldi.b $w1, -1 -; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 -; MIPS64-MSA-NEXT: insert.d $w0[1], $8 -; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 -; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 ; MIPS64-MSA-NEXT: insert.d $w1[0], $5 +; MIPS64-MSA-NEXT: fill.d $w2, $4 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 ; MIPS64-MSA-NEXT: insert.d $w1[1], $6 -; MIPS64-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra ; MIPS64-MSA-NEXT: copy_s.d $3, $w0[1] @@ -341,31 +320,28 @@ define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> ; MIPS32-MSA-NEXT: and $sp, $sp, $1 ; MIPS32-MSA-NEXT: lw $2, 56($fp) ; MIPS32-MSA-NEXT: lw $1, 60($fp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: sw $5, 12($sp) ; MIPS32-MSA-NEXT: sw $5, 4($sp) -; MIPS32-MSA-NEXT: ldi.b $w0, -1 -; MIPS32-MSA-NEXT: ld.d $w1, 0($sp) -; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 -; MIPS32-MSA-NEXT: insert.w $w2[0], $2 -; MIPS32-MSA-NEXT: slli.d $w1, $w1, 63 -; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: ld.d $w2, 0($sp) +; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 +; MIPS32-MSA-NEXT: slli.d $w2, $w2, 63 +; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 64($fp) -; MIPS32-MSA-NEXT: srai.d $w1, $w1, 63 -; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: srai.d $w2, $w2, 63 +; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 68($fp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 +; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 48($fp) -; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: insert.w $w2[0], $6 -; MIPS32-MSA-NEXT: insert.w $w2[1], $7 -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 52($fp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 -; MIPS32-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS32-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: st.d $w0, 0($4) ; MIPS32-MSA-NEXT: move $sp, $fp ; MIPS32-MSA-NEXT: lw $fp, 24($sp) # 4-byte Folded Reload @@ -381,16 +357,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) { ; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load: ; MIPS64-MSA: # %bb.0: ; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ld.w $w0, 0($6) ; MIPS64-MSA-NEXT: ld.w $w1, 0($5) -; MIPS64-MSA-NEXT: ldi.b $w2, -1 -; MIPS64-MSA-NEXT: fill.w $w0, $1 -; MIPS64-MSA-NEXT: slli.w $w0, $w0, 31 -; MIPS64-MSA-NEXT: srai.w $w0, $w0, 31 -; MIPS64-MSA-NEXT: and.v $w1, $w0, $w1 -; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: ld.w $w2, 0($6) -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -398,16 +372,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) { ; ; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load: ; MIPS32-MSA: # %bb.0: -; MIPS32-MSA-NEXT: fill.w $w0, $4 +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ld.w $w0, 0($6) ; MIPS32-MSA-NEXT: ld.w $w1, 0($5) -; MIPS32-MSA-NEXT: ldi.b $w2, -1 -; MIPS32-MSA-NEXT: slli.w $w0, $w0, 31 -; MIPS32-MSA-NEXT: srai.w $w0, $w0, 31 -; MIPS32-MSA-NEXT: and.v $w1, $w0, $w1 -; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: ld.w $w2, 0($6) -; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] ; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] @@ -424,16 +396,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) ; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load: ; MIPS64-MSA: # %bb.0: ; MIPS64-MSA-NEXT: sll $1, $4, 0 +; MIPS64-MSA-NEXT: ld.w $w0, 0($6) ; MIPS64-MSA-NEXT: ld.w $w1, 0($5) -; MIPS64-MSA-NEXT: ldi.b $w2, -1 -; MIPS64-MSA-NEXT: fill.w $w0, $1 -; MIPS64-MSA-NEXT: slli.w $w0, $w0, 31 -; MIPS64-MSA-NEXT: srai.w $w0, $w0, 31 -; MIPS64-MSA-NEXT: and.v $w1, $w0, $w1 -; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: ld.w $w2, 0($6) -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -441,16 +411,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) ; ; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load: ; MIPS32-MSA: # %bb.0: -; MIPS32-MSA-NEXT: fill.w $w0, $4 +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: ld.w $w0, 0($6) ; MIPS32-MSA-NEXT: ld.w $w1, 0($5) -; MIPS32-MSA-NEXT: ldi.b $w2, -1 -; MIPS32-MSA-NEXT: slli.w $w0, $w0, 31 -; MIPS32-MSA-NEXT: srai.w $w0, $w0, 31 -; MIPS32-MSA-NEXT: and.v $w1, $w0, $w1 -; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: ld.w $w2, 0($6) -; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] ; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] @@ -466,21 +434,19 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) { ; MIPS64-MSA-LABEL: test_ctselect_v4i32_store: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: jr $ra ; MIPS64-MSA-NEXT: st.w $w0, 0($9) ; @@ -488,27 +454,25 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $4 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 ; MIPS32-MSA-NEXT: lw $1, 40($sp) -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: jr $ra ; MIPS32-MSA-NEXT: st.w $w0, 0($1) %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) @@ -521,31 +485,28 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, ; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain: ; MIPS64-MSA: # %bb.0: ; MIPS64-MSA-NEXT: insert.d $w0[0], $8 +; MIPS64-MSA-NEXT: insert.d $w1[0], $6 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w1, -1 ; MIPS64-MSA-NEXT: fill.w $w2, $1 ; MIPS64-MSA-NEXT: sll $1, $5, 0 ; MIPS64-MSA-NEXT: insert.d $w0[1], $9 +; MIPS64-MSA-NEXT: insert.d $w1[1], $7 ; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 ; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 -; MIPS64-MSA-NEXT: xor.v $w3, $w2, $w1 -; MIPS64-MSA-NEXT: and.v $w0, $w3, $w0 -; MIPS64-MSA-NEXT: insert.d $w3[0], $6 -; MIPS64-MSA-NEXT: insert.d $w3[1], $7 -; MIPS64-MSA-NEXT: shf.w $w3, $w3, 177 -; MIPS64-MSA-NEXT: and.v $w2, $w2, $w3 -; MIPS64-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 ; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: insert.d $w1[0], $10 ; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: insert.d $w1[1], $11 ; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 -; MIPS64-MSA-NEXT: and.v $w0, $w2, $w0 -; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w1 -; MIPS64-MSA-NEXT: insert.d $w2[0], $10 -; MIPS64-MSA-NEXT: insert.d $w2[1], $11 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -555,41 +516,38 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $4 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: lw $2, 40($sp) ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w3, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w3, $w0 -; MIPS32-MSA-NEXT: insert.w $w3[0], $6 -; MIPS32-MSA-NEXT: insert.w $w3[1], $7 -; MIPS32-MSA-NEXT: insert.w $w3[2], $1 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) -; MIPS32-MSA-NEXT: insert.w $w3[3], $1 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 ; MIPS32-MSA-NEXT: lw $1, 44($sp) -; MIPS32-MSA-NEXT: and.v $w2, $w2, $w3 -; MIPS32-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 ; MIPS32-MSA-NEXT: fill.w $w2, $5 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: insert.w $w1[0], $2 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 -; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 -; MIPS32-MSA-NEXT: and.v $w0, $w2, $w0 -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: insert.w $w2[0], $2 -; MIPS32-MSA-NEXT: insert.w $w2[1], $1 +; MIPS32-MSA-NEXT: insert.w $w1[1], $1 ; MIPS32-MSA-NEXT: lw $1, 48($sp) -; MIPS32-MSA-NEXT: insert.w $w2[2], $1 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 52($sp) -; MIPS32-MSA-NEXT: insert.w $w2[3], $1 -; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: insert.w $w1[3], $1 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: and.v $w0, $w0, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w1, $w0 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] ; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] @@ -607,20 +565,18 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 ; MIPS64-MSA-NEXT: insert.d $w0[0], $7 ; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: fill.w $w3, $1 ; MIPS64-MSA-NEXT: insert.d $w0[1], $8 ; MIPS64-MSA-NEXT: insert.d $w1[1], $6 -; MIPS64-MSA-NEXT: slli.w $w3, $w3, 31 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 -; MIPS64-MSA-NEXT: srai.w $w3, $w3, 31 ; MIPS64-MSA-NEXT: fadd.w $w2, $w1, $w0 ; MIPS64-MSA-NEXT: fsub.w $w0, $w1, $w0 -; MIPS64-MSA-NEXT: ldi.b $w1, -1 -; MIPS64-MSA-NEXT: xor.v $w1, $w3, $w1 -; MIPS64-MSA-NEXT: and.v $w2, $w3, $w2 -; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS64-MSA-NEXT: xor.v $w1, $w2, $w0 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -631,11 +587,8 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) ; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: fill.w $w3, $5 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 ; MIPS32-MSA-NEXT: insert.w $w1[1], $7 -; MIPS32-MSA-NEXT: slli.w $w3, $w3, 31 -; MIPS32-MSA-NEXT: srai.w $w3, $w3, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 @@ -647,11 +600,12 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 ; MIPS32-MSA-NEXT: fadd.w $w2, $w1, $w0 ; MIPS32-MSA-NEXT: fsub.w $w0, $w1, $w0 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 -; MIPS32-MSA-NEXT: xor.v $w1, $w3, $w1 -; MIPS32-MSA-NEXT: and.v $w2, $w3, $w2 -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: or.v $w0, $w2, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w0 +; MIPS32-MSA-NEXT: fill.w $w2, $5 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: jr $ra ; MIPS32-MSA-NEXT: st.w $w0, 0($4) %sum = fadd <4 x float> %x, %y @@ -664,36 +618,32 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { ; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed: ; MIPS64-MSA: # %bb.0: +; MIPS64-MSA-NEXT: ld.w $w0, 0($6) +; MIPS64-MSA-NEXT: ld.w $w1, 0($5) ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ld.w $w0, 0($5) -; MIPS64-MSA-NEXT: ldi.b $w2, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: addvi.w $w0, $w0, 1 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: ld.w $w2, 0($6) -; MIPS64-MSA-NEXT: addvi.w $w2, $w2, 2 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: addvi.w $w0, $w0, 2 +; MIPS64-MSA-NEXT: addvi.w $w1, $w1, 1 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: jr $ra ; MIPS64-MSA-NEXT: st.w $w0, 0($7) ; ; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed: ; MIPS32-MSA: # %bb.0: -; MIPS32-MSA-NEXT: ld.w $w0, 0($5) -; MIPS32-MSA-NEXT: fill.w $w1, $4 -; MIPS32-MSA-NEXT: ldi.b $w2, -1 -; MIPS32-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS32-MSA-NEXT: addvi.w $w0, $w0, 1 -; MIPS32-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: ld.w $w2, 0($6) -; MIPS32-MSA-NEXT: addvi.w $w2, $w2, 2 +; MIPS32-MSA-NEXT: ld.w $w0, 0($6) +; MIPS32-MSA-NEXT: ld.w $w1, 0($5) +; MIPS32-MSA-NEXT: fill.w $w2, $4 +; MIPS32-MSA-NEXT: addvi.w $w0, $w0, 2 +; MIPS32-MSA-NEXT: addvi.w $w1, $w1, 1 +; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 ; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS32-MSA-NEXT: or.v $w0, $w0, $w1 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: jr $ra ; MIPS32-MSA-NEXT: st.w $w0, 0($7) %a = load <4 x i32>, ptr %p1, align 16 @@ -709,21 +659,19 @@ define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) { define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind { ; MIPS64-MSA-LABEL: test_ctselect_v4i32_args: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] ; MIPS64-MSA-NEXT: jr $ra @@ -733,26 +681,24 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $4 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] ; MIPS32-MSA-NEXT: copy_s.w $4, $w0[2] @@ -766,21 +712,19 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) { ; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use: ; MIPS64-MSA: # %bb.0: -; MIPS64-MSA-NEXT: insert.d $w2[0], $7 +; MIPS64-MSA-NEXT: insert.d $w0[0], $7 +; MIPS64-MSA-NEXT: insert.d $w1[0], $5 ; MIPS64-MSA-NEXT: sll $1, $4, 0 -; MIPS64-MSA-NEXT: ldi.b $w0, -1 -; MIPS64-MSA-NEXT: fill.w $w1, $1 -; MIPS64-MSA-NEXT: insert.d $w2[1], $8 -; MIPS64-MSA-NEXT: slli.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: srai.w $w1, $w1, 31 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 -; MIPS64-MSA-NEXT: xor.v $w0, $w1, $w0 -; MIPS64-MSA-NEXT: and.v $w0, $w0, $w2 -; MIPS64-MSA-NEXT: insert.d $w2[0], $5 -; MIPS64-MSA-NEXT: insert.d $w2[1], $6 -; MIPS64-MSA-NEXT: shf.w $w2, $w2, 177 +; MIPS64-MSA-NEXT: fill.w $w2, $1 +; MIPS64-MSA-NEXT: insert.d $w0[1], $8 +; MIPS64-MSA-NEXT: insert.d $w1[1], $6 +; MIPS64-MSA-NEXT: slli.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS64-MSA-NEXT: srai.w $w2, $w2, 31 +; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 +; MIPS64-MSA-NEXT: shf.w $w1, $w1, 177 ; MIPS64-MSA-NEXT: and.v $w1, $w1, $w2 -; MIPS64-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS64-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS64-MSA-NEXT: addv.w $w0, $w0, $w0 ; MIPS64-MSA-NEXT: shf.w $w0, $w0, 177 ; MIPS64-MSA-NEXT: copy_s.d $2, $w0[0] @@ -791,26 +735,24 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32 ; MIPS32-MSA: # %bb.0: ; MIPS32-MSA-NEXT: lw $2, 24($sp) ; MIPS32-MSA-NEXT: lw $1, 28($sp) +; MIPS32-MSA-NEXT: insert.w $w1[0], $6 ; MIPS32-MSA-NEXT: fill.w $w2, $4 -; MIPS32-MSA-NEXT: ldi.b $w1, -1 ; MIPS32-MSA-NEXT: insert.w $w0[0], $2 +; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: slli.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: srai.w $w2, $w2, 31 ; MIPS32-MSA-NEXT: insert.w $w0[1], $1 ; MIPS32-MSA-NEXT: lw $1, 32($sp) -; MIPS32-MSA-NEXT: xor.v $w1, $w2, $w1 ; MIPS32-MSA-NEXT: insert.w $w0[2], $1 ; MIPS32-MSA-NEXT: lw $1, 36($sp) ; MIPS32-MSA-NEXT: insert.w $w0[3], $1 ; MIPS32-MSA-NEXT: lw $1, 16($sp) -; MIPS32-MSA-NEXT: and.v $w0, $w1, $w0 -; MIPS32-MSA-NEXT: insert.w $w1[0], $6 -; MIPS32-MSA-NEXT: insert.w $w1[1], $7 ; MIPS32-MSA-NEXT: insert.w $w1[2], $1 ; MIPS32-MSA-NEXT: lw $1, 20($sp) ; MIPS32-MSA-NEXT: insert.w $w1[3], $1 -; MIPS32-MSA-NEXT: and.v $w1, $w2, $w1 -; MIPS32-MSA-NEXT: or.v $w0, $w1, $w0 +; MIPS32-MSA-NEXT: xor.v $w1, $w1, $w0 +; MIPS32-MSA-NEXT: and.v $w1, $w1, $w2 +; MIPS32-MSA-NEXT: xor.v $w0, $w0, $w1 ; MIPS32-MSA-NEXT: addv.w $w0, $w0, $w0 ; MIPS32-MSA-NEXT: copy_s.w $2, $w0[0] ; MIPS32-MSA-NEXT: copy_s.w $3, $w0[1] diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll index d89d7fc69871..6a61412367f7 100644 --- a/llvm/test/CodeGen/Mips/ctselect-fallback.ll +++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll @@ -11,7 +11,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { ; M32-NEXT: negu $2, $2 ; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: xor $2, $1, $6 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_i8: ; M64: # %bb.0: @@ -23,7 +23,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) { ; M64-NEXT: and $1, $2, $1 ; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: xor $2, $1, $2 +; M64-NEXT: xor $2, $2, $1 %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b) ret i8 %result } @@ -36,7 +36,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { ; M32-NEXT: negu $2, $2 ; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: xor $2, $1, $6 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_i16: ; M64: # %bb.0: @@ -48,7 +48,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { ; M64-NEXT: and $1, $2, $1 ; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: xor $2, $1, $2 +; M64-NEXT: xor $2, $2, $1 %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b) ret i16 %result } @@ -56,26 +56,24 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) { define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_i32: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_i32: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $3, $5, 0 +; M64-NEXT: xor $2, $5, $6 ; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: sll $3, $6, 0 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result } @@ -88,22 +86,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { ; M32-NEXT: negu $3, $3 ; M32-NEXT: xor $2, $6, $1 ; M32-NEXT: and $2, $2, $3 -; M32-NEXT: xor $2, $2, $1 +; M32-NEXT: xor $2, $1, $2 ; M32-NEXT: lw $1, 20($sp) ; M32-NEXT: xor $4, $7, $1 ; M32-NEXT: and $3, $4, $3 ; M32-NEXT: jr $ra -; M32-NEXT: xor $3, $3, $1 +; M32-NEXT: xor $3, $1, $3 ; ; M64-LABEL: test_ctselect_i64: ; M64: # %bb.0: -; M64-NEXT: andi $1, $4, 1 -; M64-NEXT: dnegu $2, $1 -; M64-NEXT: daddiu $1, $1, -1 -; M64-NEXT: and $2, $2, $5 -; M64-NEXT: and $1, $1, $6 +; M64-NEXT: andi $2, $4, 1 +; M64-NEXT: xor $1, $5, $6 +; M64-NEXT: dnegu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $6, $1 %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b) ret i64 %result } @@ -111,23 +108,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) { define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) { ; M32-LABEL: test_ctselect_ptr: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_ctselect_ptr: ; M64: # %bb.0: -; M64-NEXT: andi $1, $4, 1 -; M64-NEXT: dnegu $2, $1 -; M64-NEXT: daddiu $1, $1, -1 -; M64-NEXT: and $2, $2, $5 -; M64-NEXT: and $1, $1, $6 +; M64-NEXT: andi $2, $4, 1 +; M64-NEXT: xor $1, $5, $6 +; M64-NEXT: dnegu $2, $2 +; M64-NEXT: and $1, $1, $2 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $6, $1 %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b) ret ptr %result } @@ -151,13 +146,12 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_const_false: ; M32: # %bb.0: ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $zero, $5 +; M32-NEXT: move $2, $5 ; ; M64-LABEL: test_ctselect_const_false: ; M64: # %bb.0: -; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $zero, $1 +; M64-NEXT: sll $2, $5, 0 %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b) ret i32 %result } @@ -166,29 +160,27 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) { define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_icmp_eq: ; M32: # %bb.0: -; M32-NEXT: xor $1, $4, $5 -; M32-NEXT: sltu $1, $zero, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: xor $2, $4, $5 +; M32-NEXT: xor $1, $6, $7 +; M32-NEXT: sltiu $2, $2, 1 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $7, $1 ; ; M64-LABEL: test_ctselect_icmp_eq: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $3, $7, 0 ; M64-NEXT: xor $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 -; M64-NEXT: sltu $1, $zero, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $1, $2 -; M64-NEXT: not $1, $1 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: xor $2, $6, $7 +; M64-NEXT: sltiu $1, $1, 1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $7, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %cond = icmp eq i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result @@ -197,29 +189,27 @@ define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) { define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_icmp_ne: ; M32: # %bb.0: -; M32-NEXT: xor $1, $4, $5 -; M32-NEXT: sltiu $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: xor $2, $4, $5 +; M32-NEXT: xor $1, $6, $7 +; M32-NEXT: sltu $2, $zero, $2 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $7, $1 ; ; M64-LABEL: test_ctselect_icmp_ne: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $3, $7, 0 ; M64-NEXT: xor $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 -; M64-NEXT: sltiu $1, $1, 1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $1, $2 -; M64-NEXT: not $1, $1 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: xor $2, $6, $7 +; M64-NEXT: sltu $1, $zero, $1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $7, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %cond = icmp ne i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result @@ -228,29 +218,25 @@ define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) { define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_icmp_slt: ; M32: # %bb.0: -; M32-NEXT: slt $1, $4, $5 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: slt $2, $4, $5 +; M32-NEXT: xor $1, $6, $7 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $7, $1 ; ; M64-LABEL: test_ctselect_icmp_slt: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $3, $7, 0 ; M64-NEXT: slt $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 -; M64-NEXT: xori $1, $1, 1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $1, $2 -; M64-NEXT: not $1, $1 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: xor $2, $6, $7 +; M64-NEXT: negu $1, $1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $7, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %cond = icmp slt i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result @@ -259,29 +245,25 @@ define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) { define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { ; M32-LABEL: test_ctselect_icmp_ult: ; M32: # %bb.0: -; M32-NEXT: sltu $1, $4, $5 -; M32-NEXT: xori $1, $1, 1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $1, $6 -; M32-NEXT: not $1, $1 -; M32-NEXT: and $1, $1, $7 +; M32-NEXT: sltu $2, $4, $5 +; M32-NEXT: xor $1, $6, $7 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $7, $1 ; ; M64-LABEL: test_ctselect_icmp_ult: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 ; M64-NEXT: sll $2, $4, 0 -; M64-NEXT: sll $3, $7, 0 ; M64-NEXT: sltu $1, $2, $1 -; M64-NEXT: sll $2, $6, 0 -; M64-NEXT: xori $1, $1, 1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $1, $2 -; M64-NEXT: not $1, $1 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: xor $2, $6, $7 +; M64-NEXT: negu $1, $1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $7, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %cond = icmp ult i32 %x, %y %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result @@ -291,28 +273,26 @@ define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) { define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { ; M32-LABEL: test_ctselect_load: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: lw $2, 0($6) ; M32-NEXT: lw $3, 0($5) -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $3 -; M32-NEXT: lw $3, 0($6) -; M32-NEXT: and $1, $1, $3 +; M32-NEXT: andi $1, $4, 1 +; M32-NEXT: negu $1, $1 +; M32-NEXT: xor $3, $3, $2 +; M32-NEXT: and $1, $3, $1 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $2, $1 ; ; M64-LABEL: test_ctselect_load: ; M64: # %bb.0: -; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: lw $3, 0($5) -; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 +; M64-NEXT: sll $3, $4, 0 +; M64-NEXT: lw $1, 0($6) +; M64-NEXT: lw $2, 0($5) +; M64-NEXT: andi $3, $3, 1 +; M64-NEXT: xor $2, $2, $1 +; M64-NEXT: negu $3, $3 ; M64-NEXT: and $2, $2, $3 -; M64-NEXT: lw $3, 0($6) -; M64-NEXT: and $1, $1, $3 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $1, $2 %a = load i32, ptr %p1 %b = load i32, ptr %p2 %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) @@ -323,41 +303,37 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) { define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) { ; M32-LABEL: test_ctselect_nested: ; M32: # %bb.0: -; M32-NEXT: andi $1, $5, 1 +; M32-NEXT: andi $2, $5, 1 +; M32-NEXT: xor $1, $6, $7 ; M32-NEXT: andi $3, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: negu $4, $3 -; M32-NEXT: and $2, $2, $6 -; M32-NEXT: and $1, $1, $7 -; M32-NEXT: or $1, $2, $1 -; M32-NEXT: addiu $2, $3, -1 -; M32-NEXT: lw $3, 16($sp) -; M32-NEXT: and $1, $4, $1 -; M32-NEXT: and $2, $2, $3 +; M32-NEXT: negu $2, $2 +; M32-NEXT: negu $3, $3 +; M32-NEXT: and $1, $1, $2 +; M32-NEXT: lw $2, 16($sp) +; M32-NEXT: xor $1, $7, $1 +; M32-NEXT: xor $1, $1, $2 +; M32-NEXT: and $1, $1, $3 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $1, $2 +; M32-NEXT: xor $2, $2, $1 ; ; M64-LABEL: test_ctselect_nested: ; M64: # %bb.0: ; M64-NEXT: sll $1, $5, 0 -; M64-NEXT: sll $3, $6, 0 -; M64-NEXT: sll $4, $4, 0 +; M64-NEXT: xor $2, $6, $7 +; M64-NEXT: sll $3, $4, 0 ; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: andi $4, $4, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: negu $5, $4 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: sll $3, $7, 0 -; M64-NEXT: and $1, $1, $3 -; M64-NEXT: addiu $3, $4, -1 -; M64-NEXT: or $1, $2, $1 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: andi $3, $3, 1 +; M64-NEXT: negu $1, $1 +; M64-NEXT: negu $3, $3 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $7, 0 +; M64-NEXT: xor $1, $2, $1 ; M64-NEXT: sll $2, $8, 0 -; M64-NEXT: and $1, $5, $1 -; M64-NEXT: and $2, $3, $2 +; M64-NEXT: xor $1, $1, $2 +; M64-NEXT: and $1, $1, $3 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $1, $2 +; M64-NEXT: xor $2, $2, $1 %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b) %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c) ret i32 %result diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll index 6cfa07afdd51..069100e2d2a7 100644 --- a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll +++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll @@ -38,26 +38,24 @@ define i32 @test_constant_fold() { define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) { ; M32-LABEL: test_protected_no_branch: ; M32: # %bb.0: -; M32-NEXT: andi $1, $4, 1 -; M32-NEXT: negu $2, $1 -; M32-NEXT: addiu $1, $1, -1 -; M32-NEXT: and $2, $2, $5 -; M32-NEXT: and $1, $1, $6 +; M32-NEXT: andi $2, $4, 1 +; M32-NEXT: xor $1, $5, $6 +; M32-NEXT: negu $2, $2 +; M32-NEXT: and $1, $1, $2 ; M32-NEXT: jr $ra -; M32-NEXT: or $2, $2, $1 +; M32-NEXT: xor $2, $6, $1 ; ; M64-LABEL: test_protected_no_branch: ; M64: # %bb.0: ; M64-NEXT: sll $1, $4, 0 -; M64-NEXT: sll $3, $5, 0 +; M64-NEXT: xor $2, $5, $6 ; M64-NEXT: andi $1, $1, 1 -; M64-NEXT: negu $2, $1 -; M64-NEXT: addiu $1, $1, -1 -; M64-NEXT: and $2, $2, $3 -; M64-NEXT: sll $3, $6, 0 -; M64-NEXT: and $1, $1, $3 +; M64-NEXT: sll $2, $2, 0 +; M64-NEXT: negu $1, $1 +; M64-NEXT: and $1, $2, $1 +; M64-NEXT: sll $2, $6, 0 ; M64-NEXT: jr $ra -; M64-NEXT: or $2, $2, $1 +; M64-NEXT: xor $2, $2, $1 %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b) ret i32 %result } |
