diff options
Diffstat (limited to 'llvm/test')
460 files changed, 24119 insertions, 14777 deletions
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-cmp.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-cmp.ll index 27d24fa..caa6d6f 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-cmp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-cmp.ll @@ -9,38 +9,38 @@ define void @icmp_eq() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp eq <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp eq <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp eq <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp eq <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp eq <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp eq <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp eq <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp eq <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp eq <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp eq <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp eq <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp eq <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp eq <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp eq <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp eq <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp eq <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp eq <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp eq <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp eq <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp eq <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp eq <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp eq <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp eq <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp eq <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp eq <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp eq <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp eq <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp eq <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp eq <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp eq <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp eq <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp eq <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp eq <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp eq <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp eq <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp eq <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp eq <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp eq <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp eq <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp eq <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp eq <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp eq <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp eq <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp eq <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp eq <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp eq <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp eq <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp eq <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp eq <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp eq <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp eq <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp eq <2 x i8> undef, undef @@ -96,38 +96,38 @@ define void @icmp_ne() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp ne <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp ne <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp ne <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp ne <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp ne <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp ne <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp ne <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp ne <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp ne <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp ne <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp ne <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp ne <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp ne <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp ne <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp ne <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp ne <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp ne <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp ne <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp ne <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp ne <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp ne <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp ne <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp ne <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp ne <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp ne <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp ne <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp ne <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp ne <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp ne <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp ne <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp ne <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp ne <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp ne <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp ne <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp ne <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp ne <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp ne <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp ne <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp ne <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp ne <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp ne <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp ne <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp ne <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp ne <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp ne <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp ne <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp ne <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp ne <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp ne <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp ne <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp ne <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp ne <2 x i8> undef, undef @@ -183,38 +183,38 @@ define void @icmp_ugt() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp ugt <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp ugt <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp ugt <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp ugt <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp ugt <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp ugt <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp ugt <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp ugt <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp ugt <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp ugt <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp ugt <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp ugt <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp ugt <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp ugt <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp ugt <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp ugt <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp ugt <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp ugt <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp ugt <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp ugt <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp ugt <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp ugt <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp ugt <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp ugt <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp ugt <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp ugt <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp ugt <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp ugt <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp ugt <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp ugt <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp ugt <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp ugt <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp ugt <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp ugt <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp ugt <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp ugt <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp ugt <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp ugt <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp ugt <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp ugt <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp ugt <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp ugt <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp ugt <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp ugt <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp ugt <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp ugt <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp ugt <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp ugt <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp ugt <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp ugt <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp ugt <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp ugt <2 x i8> undef, undef @@ -270,38 +270,38 @@ define void @icmp_uge() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp uge <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp uge <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp uge <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp uge <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp uge <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp uge <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp uge <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp uge <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp uge <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp uge <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp uge <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp uge <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp uge <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp uge <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp uge <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp uge <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp uge <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp uge <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp uge <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp uge <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp uge <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp uge <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp uge <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp uge <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp uge <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp uge <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp uge <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp uge <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp uge <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp uge <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp uge <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp uge <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp uge <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp uge <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp uge <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp uge <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp uge <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp uge <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp uge <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp uge <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp uge <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp uge <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp uge <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp uge <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp uge <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp uge <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp uge <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp uge <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp uge <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp uge <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp uge <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp uge <2 x i8> undef, undef @@ -357,38 +357,38 @@ define void @icmp_ult() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp ult <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp ult <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp ult <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp ult <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp ult <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp ult <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp ult <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp ult <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp ult <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp ult <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp ult <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp ult <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp ult <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp ult <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp ult <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp ult <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp ult <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp ult <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp ult <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp ult <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp ult <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp ult <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp ult <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp ult <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp ult <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp ult <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp ult <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp ult <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp ult <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp ult <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp ult <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp ult <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp ult <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp ult <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp ult <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp ult <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp ult <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp ult <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp ult <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp ult <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp ult <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp ult <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp ult <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp ult <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp ult <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp ult <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp ult <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp ult <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp ult <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp ult <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp ult <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp ult <2 x i8> undef, undef @@ -444,38 +444,38 @@ define void @icmp_ule() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp ule <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp ule <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp ule <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp ule <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp ule <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp ule <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp ule <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp ule <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp ule <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp ule <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp ule <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp ule <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp ule <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp ule <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp ule <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp ule <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp ule <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp ule <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp ule <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp ule <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp ule <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp ule <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp ule <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp ule <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp ule <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp ule <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp ule <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp ule <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp ule <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp ule <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp ule <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp ule <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp ule <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp ule <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp ule <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp ule <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp ule <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp ule <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp ule <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp ule <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp ule <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp ule <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp ule <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp ule <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp ule <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp ule <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp ule <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp ule <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp ule <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp ule <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp ule <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp ule <2 x i8> undef, undef @@ -531,38 +531,38 @@ define void @icmp_sgt() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp sgt <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp sgt <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp sgt <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp sgt <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp sgt <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp sgt <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp sgt <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp sgt <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp sgt <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp sgt <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp sgt <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp sgt <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp sgt <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp sgt <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp sgt <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp sgt <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp sgt <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp sgt <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp sgt <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp sgt <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp sgt <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp sgt <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp sgt <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp sgt <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp sgt <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp sgt <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp sgt <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp sgt <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp sgt <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp sgt <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp sgt <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp sgt <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp sgt <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp sgt <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp sgt <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp sgt <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp sgt <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp sgt <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp sgt <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp sgt <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp sgt <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp sgt <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp sgt <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp sgt <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp sgt <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp sgt <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp sgt <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp sgt <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp sgt <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp sgt <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp sgt <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp sgt <2 x i8> undef, undef @@ -618,38 +618,38 @@ define void @icmp_sge() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp sge <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp sge <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp sge <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp sge <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp sge <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp sge <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp sge <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp sge <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp sge <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp sge <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp sge <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp sge <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp sge <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp sge <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp sge <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp sge <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp sge <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp sge <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp sge <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp sge <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp sge <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp sge <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp sge <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp sge <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp sge <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp sge <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp sge <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp sge <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp sge <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp sge <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp sge <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp sge <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp sge <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp sge <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp sge <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp sge <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp sge <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp sge <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp sge <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp sge <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp sge <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp sge <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp sge <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp sge <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp sge <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp sge <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp sge <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp sge <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp sge <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp sge <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp sge <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp sge <2 x i8> undef, undef @@ -705,38 +705,38 @@ define void @icmp_slt() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp slt <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp slt <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp slt <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp slt <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp slt <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp slt <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp slt <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp slt <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp slt <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp slt <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp slt <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp slt <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp slt <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp slt <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp slt <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp slt <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp slt <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp slt <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp slt <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp slt <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp slt <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp slt <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp slt <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp slt <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp slt <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp slt <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp slt <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp slt <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp slt <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp slt <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp slt <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp slt <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp slt <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp slt <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp slt <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp slt <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp slt <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp slt <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp slt <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp slt <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp slt <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp slt <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp slt <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp slt <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp slt <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp slt <2 x i8> undef, undef @@ -792,38 +792,38 @@ define void @icmp_sle() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8 = icmp sle <4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8 = icmp sle <8 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i8 = icmp sle <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i8 = icmp sle <32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8 = icmp sle <32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i8 = icmp sle <vscale x 1 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i8 = icmp sle <vscale x 2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i8 = icmp sle <vscale x 4 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i8 = icmp sle <vscale x 8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i8 = icmp sle <vscale x 16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i8 = icmp sle <vscale x 32 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8 = icmp sle <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i8 = icmp sle <vscale x 32 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i16 = icmp sle <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i16 = icmp sle <4 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16 = icmp sle <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16 = icmp sle <16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i16 = icmp sle <16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16 = icmp sle <vscale x 1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i16 = icmp sle <vscale x 2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16 = icmp sle <vscale x 4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16 = icmp sle <vscale x 8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16 = icmp sle <vscale x 16 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16 = icmp sle <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i16 = icmp sle <vscale x 16 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i32 = icmp sle <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i32 = icmp sle <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32 = icmp sle <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i32 = icmp sle <16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32 = icmp sle <8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16i32 = icmp sle <16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i32 = icmp sle <vscale x 1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i32 = icmp sle <vscale x 2 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32 = icmp sle <vscale x 4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32 = icmp sle <vscale x 8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32 = icmp sle <vscale x 16 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32 = icmp sle <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i32 = icmp sle <vscale x 8 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i32 = icmp sle <vscale x 16 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i64 = icmp sle <2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i64 = icmp sle <4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64 = icmp sle <8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64 = icmp sle <4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64 = icmp sle <8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i64 = icmp sle <vscale x 1 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2i64 = icmp sle <vscale x 2 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64 = icmp sle <vscale x 4 x i64> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64 = icmp sle <vscale x 8 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64 = icmp sle <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64 = icmp sle <vscale x 4 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i64 = icmp sle <vscale x 8 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i8 = icmp sle <2 x i8> undef, undef diff --git a/llvm/test/Bindings/llvm-c/atomics.ll b/llvm/test/Bindings/llvm-c/atomics.ll index e64a299..162368c 100644 --- a/llvm/test/Bindings/llvm-c/atomics.ll +++ b/llvm/test/Bindings/llvm-c/atomics.ll @@ -36,6 +36,31 @@ define void @atomic_load_store(ptr %word) { ret void } +define void @atomic_rmw_ops(ptr %p, i32 %i, float %f) { + ; Test all atomicrmw operations + %a.xchg = atomicrmw xchg ptr %p, i32 %i acq_rel, align 8 + %a.add = atomicrmw add ptr %p, i32 %i acq_rel, align 8 + %a.sub = atomicrmw sub ptr %p, i32 %i acq_rel, align 8 + %a.and = atomicrmw and ptr %p, i32 %i acq_rel, align 8 + %a.nand = atomicrmw nand ptr %p, i32 %i acq_rel, align 8 + %a.or = atomicrmw or ptr %p, i32 %i acq_rel, align 8 + %a.xor = atomicrmw xor ptr %p, i32 %i acq_rel, align 8 + %a.max = atomicrmw max ptr %p, i32 %i acq_rel, align 8 + %a.min = atomicrmw min ptr %p, i32 %i acq_rel, align 8 + %a.umax = atomicrmw umax ptr %p, i32 %i acq_rel, align 8 + %a.umin = atomicrmw umin ptr %p, i32 %i acq_rel, align 8 + + %a.fadd = atomicrmw fadd ptr %p, float %f acq_rel, align 8 + %a.fsub = atomicrmw fsub ptr %p, float %f acq_rel, align 8 + %a.fmax = atomicrmw fmax ptr %p, float %f acq_rel, align 8 + %a.fmin = atomicrmw fmin ptr %p, float %f acq_rel, align 8 + + %a.uinc_wrap = atomicrmw uinc_wrap ptr %p, i32 %i acq_rel, align 8 + %a.udec_wrap = atomicrmw udec_wrap ptr %p, i32 %i acq_rel, align 8 + + ret void +} + define i32 @main() { %1 = alloca i32, align 4 %2 = cmpxchg ptr %1, i32 2, i32 3 seq_cst acquire diff --git a/llvm/test/Bitcode/dbg-label-record-bc.ll b/llvm/test/Bitcode/dbg-label-record-bc.ll new file mode 100644 index 0000000..e151f7f --- /dev/null +++ b/llvm/test/Bitcode/dbg-label-record-bc.ll @@ -0,0 +1,50 @@ +;; Tests that we can parse and print a function containing a debug label record +;; and no other debug record kinds. + +; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=true %s -o - \ +; RUN: | opt -S | FileCheck %s --check-prefixes=CHECK,INTRINSIC + +; RUN: llvm-as --write-experimental-debuginfo-iterators-to-bitcode=true %s -o - \ +; RUN: | opt -S --preserve-input-debuginfo-format=true \ +; RUN: | FileCheck %s --check-prefixes=CHECK,RECORD + +source_filename = "bbi-94196.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: void @foo() +; CHECK: bar: +; INTRINSIC-NEXT: call void @llvm.dbg.label(metadata ![[LABEL:[0-9]+]]), !dbg ![[LOC:[0-9]+]] +; RECORD-NEXT: #dbg_label(![[LABEL:[0-9]+]], ![[LOC:[0-9]+]]) + +; CHECK-DAG: ![[LABEL]] = !DILabel({{.*}}name: "bar" +; CHECK-DAG: ![[LOC]] = !DILocation(line: 5, column: 1 + +define dso_local void @foo() !dbg !5 { +entry: + br label %bar, !dbg !9 + +bar: ; preds = %entry + tail call void @llvm.dbg.label(metadata !10), !dbg !11 + ret void, !dbg !12 +} + +declare void @llvm.dbg.label(metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3} +!llvm.ident = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "<stdin>", directory: "/home/gbtozers/dev/llvm-project-ddd-textual-ir") +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{!"clang version 19.0.0git"} +!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !6, line: 1, type: !7, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0) +!6 = !DIFile(filename: "bbi-94196.c", directory: "/home/gbtozers/dev/llvm-project-ddd-textual-ir") +!7 = !DISubroutineType(types: !8) +!8 = !{null} +!9 = !DILocation(line: 3, column: 3, scope: !5) +!10 = !DILabel(scope: !5, name: "bar", file: !6, line: 5) +!11 = !DILocation(line: 5, column: 1, scope: !5) +!12 = !DILocation(line: 6, column: 3, scope: !5) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir index 76d8288..d791660 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-fp-const-lhs.mir @@ -116,3 +116,129 @@ body: | $q0 = COPY %mul RET_ReallyLR ... +--- +name: fminnum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminnum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINNUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINNUM %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaxnum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaxnum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXNUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXNUM %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: fminnum_ieee +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminnum_ieee + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINNUM_IEEE [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINNUM_IEEE %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaxnum_ieee +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaxnum_ieee + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXNUM_IEEE [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXNUM_IEEE %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: fminimum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fminimum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %min:_(s32) = G_FMINIMUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %min:_(s32) = G_FMINIMUM %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: fmaximum +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: fmaximum + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + ; CHECK-NEXT: %max:_(s32) = G_FMAXIMUM [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_FCONSTANT float 2.000000e+00 + %max:_(s32) = G_FMAXIMUM %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir new file mode 100644 index 0000000..1636549 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir @@ -0,0 +1,456 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- +name: add +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: add + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_ADD [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_ADD %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: mul +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: mul + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_MUL [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_MUL %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: and +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: and + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %and:_(s32) = G_AND [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %and(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %and:_(s32) = G_AND %cst, %0 + $s0 = COPY %and + RET_ReallyLR +... +--- +name: or +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: or + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %or:_(s32) = G_OR [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %or(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %or:_(s32) = G_OR %cst, %0 + $s0 = COPY %or + RET_ReallyLR +... +--- +name: xor +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: xor + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: %xor:_(s32) = G_XOR [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %xor(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 5 + %xor:_(s32) = G_XOR %cst, %0 + $s0 = COPY %xor + RET_ReallyLR +... +--- +name: smin +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smin + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %min:_(s32) = G_SMIN [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %min:_(s32) = G_SMIN %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: smax +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smax + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %max:_(s32) = G_SMAX [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %max:_(s32) = G_SMAX %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: umin +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umin + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %min:_(s32) = G_UMIN [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %min(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %min:_(s32) = G_UMIN %cst, %0 + $s0 = COPY %min + RET_ReallyLR +... +--- +name: umax +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umax + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 10 + ; CHECK-NEXT: %max:_(s32) = G_UMAX [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %max(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 10 + %max:_(s32) = G_UMAX %cst, %0 + $s0 = COPY %max + RET_ReallyLR +... +--- +name: uaddo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: uaddo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32), %overflow:_(s1) = G_UADDO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32), %overflow:_(s1) = G_UADDO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR + +... +--- +name: saddo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: saddo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32), %overflow:_(s1) = G_SADDO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32), %overflow:_(s1) = G_SADDO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR + +... +--- +name: umulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_UMULO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32), %overflow:_(s1) = G_UMULO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR +... +--- +name: smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulo + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO [[COPY]], %cst + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $s0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cst, %0 + %ret:_(s32) = G_ANYEXT %overflow + $s0 = COPY %ret + RET_ReallyLR +... +--- +name: umulh +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulh + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULH [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULH %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: smulh +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulh + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULH [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULH %cst, %0 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: uaddsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: uaddsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_UADDSAT [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_UADDSAT %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: saddsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: saddsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %add:_(s32) = G_SADDSAT [[COPY]], %cst + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 1 + %add:_(s32) = G_SADDSAT %cst, %0 + $s0 = COPY %add + RET_ReallyLR + +... +--- +name: smulfix +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulfix + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_SMULFIX [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_SMULFIX %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: umulfix +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulfix + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIX [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULFIX %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: smulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: smulfixsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_SMULFIXSAT [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_SMULFIXSAT %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... +--- +name: umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $s0 + + ; CHECK-LABEL: name: umulfixsat + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIXSAT [[COPY]], %cst, 7 + ; CHECK-NEXT: $s0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $s0 + %cst:_(s32) = G_CONSTANT i32 3 + %mul:_(s32) = G_UMULFIXSAT %cst, %0, 7 + $s0 = COPY %mul + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir index 01e0dce..c967e4f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-const-fold-barrier-rhs.mir @@ -78,3 +78,163 @@ body: | RET_ReallyLR ... +--- +name: cfb_lhs_smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO [[COPY]], %cfb + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %0 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_cfb_already_rhs_smulo +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cfb_already_rhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cfb2 + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 6 + %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cfb2 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_cst_on_rhs_smulo +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cst_on_rhs_smulo + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 6 + ; CHECK-NEXT: %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cst2 + ; CHECK-NEXT: %ret:_(s32) = G_ANYEXT %overflow(s1) + ; CHECK-NEXT: $w0 = COPY %ret(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 6 + %mul:_(s32), %overflow:_(s1) = G_SMULO %cfb, %cst2 + %ret:_(s32) = G_ANYEXT %overflow + $w0 = COPY %ret + RET_ReallyLR + +... +--- +name: cfb_lhs_umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %mul:_(s32) = G_UMULFIXSAT [[COPY]], %cfb, 7 + ; CHECK-NEXT: $w0 = COPY %mul(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %mul:_(s32) = G_UMULFIXSAT %cfb, %0, 7 + $w0 = COPY %mul + RET_ReallyLR + +... +--- +name: cfb_lhs_cfb_already_rhs_umulfixsat +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cfb_already_rhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + ; CHECK-NEXT: %add:_(s32) = G_UMULFIXSAT %cfb, %cfb2, 7 + ; CHECK-NEXT: $w0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 2 + %cfb2:_(s32) = G_CONSTANT_FOLD_BARRIER %cst2 + %add:_(s32) = G_UMULFIXSAT %cfb, %cfb2, 7 + $w0 = COPY %add + RET_ReallyLR + +... +--- +name: cfb_lhs_cst_on_rhs_umulfixsat +alignment: 4 +tracksRegLiveness: true +body: | + bb.1: + liveins: $w0 + + ; CHECK-LABEL: name: cfb_lhs_cst_on_rhs_umulfixsat + ; CHECK: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %cst:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + ; CHECK-NEXT: %cst2:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %add:_(s32) = G_UMULFIXSAT %cfb, %cst2, 7 + ; CHECK-NEXT: $w0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %0:_(s32) = COPY $w0 + %cst:_(s32) = G_CONSTANT i32 1 + %cfb:_(s32) = G_CONSTANT_FOLD_BARRIER %cst + %cst2:_(s32) = G_CONSTANT i32 2 + %add:_(s32) = G_UMULFIXSAT %cfb, %cst2, 7 + $w0 = COPY %add + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index 200e9d1..0793f39 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -154,8 +154,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_INTRINSIC_LRINT (opcode {{[0-9]+}}): 2 type indices, 0 imm indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected -# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_INTRINSIC_ROUNDEVEN (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir index 8c4300d..03e507f 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-select-to-fminmax.mir @@ -11,7 +11,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s16) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $h0 = COPY [[FMAXIMUM]](s16) ; CHECK-NEXT: RET_ReallyLR implicit $h0 %0:_(s16) = COPY $h0 @@ -33,7 +33,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s32) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $s0 = COPY [[FMAXIMUM]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $s0 %0:_(s32) = COPY $s0 @@ -55,7 +55,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(s64) = G_FMAXIMUM [[COPY]], [[C]] ; CHECK-NEXT: $d0 = COPY [[FMAXIMUM]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s64) = COPY $d0 @@ -77,7 +77,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[C]], [[COPY]] + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(s64) = G_FMINIMUM [[COPY]], [[C]] ; CHECK-NEXT: $d0 = COPY [[FMINIMUM]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $d0 %0:_(s64) = COPY $d0 @@ -100,7 +100,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<8 x s16>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<8 x s16>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<8 x s16>) = COPY $q0 @@ -125,7 +125,7 @@ body: | ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[COPY]](<2 x s64>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BUILD_VECTOR]], [[BITCAST]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<4 x s32>) = G_FMAXIMUM [[BITCAST]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<4 x s32>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %1:_(<2 x s64>) = COPY $q0 @@ -150,7 +150,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMAXIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMAXIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMAXIMUM]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 @@ -174,7 +174,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $q0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) - ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[BUILD_VECTOR]], [[COPY]] + ; CHECK-NEXT: [[FMINIMUM:%[0-9]+]]:_(<2 x s64>) = G_FMINIMUM [[COPY]], [[BUILD_VECTOR]] ; CHECK-NEXT: $q0 = COPY [[FMINIMUM]](<2 x s64>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 %0:_(<2 x s64>) = COPY $q0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll index 7badf47..ae0a9b1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-to-fmin-fmax.ll @@ -4,7 +4,7 @@ define half @test_s16(half %a) #0 { ; CHECK-LABEL: test_s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax h0, h1, h0 +; CHECK-NEXT: fmax h0, h0, h1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt half %a, 0.0 @@ -16,7 +16,7 @@ define float @test_s32(float %a) #0 { ; CHECK-LABEL: test_s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax s0, s1, s0 +; CHECK-NEXT: fmax s0, s0, s1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt float %a, 0.0 @@ -28,7 +28,7 @@ define double @test_s64(double %a) #0 { ; CHECK-LABEL: test_s64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fmax d0, d1, d0 +; CHECK-NEXT: fmax d0, d0, d1 ; CHECK-NEXT: ret entry: %fcmp = fcmp olt double %a, 0.0 @@ -40,7 +40,7 @@ define <4 x half> @test_v4s16(<4 x half> %a) #0 { ; CHECK-LABEL: test_v4s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.4h, v1.4h, v0.4h +; CHECK-NEXT: fmax v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <4 x half> %a, zeroinitializer @@ -52,7 +52,7 @@ define <8 x half> @test_v8s16(<8 x half> %a) #0 { ; CHECK-LABEL: test_v8s16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.8h, v1.8h, v0.8h +; CHECK-NEXT: fmax v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <8 x half> %a, zeroinitializer @@ -64,7 +64,7 @@ define <2 x float> @test_v2s32(<2 x float> %a) #0 { ; CHECK-LABEL: test_v2s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.2s, v1.2s, v0.2s +; CHECK-NEXT: fmax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <2 x float> %a, zeroinitializer @@ -76,7 +76,7 @@ define <4 x float> @test_v4s32(<4 x float> %a) #0 { ; CHECK-LABEL: test_v4s32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.4s, v1.4s, v0.4s +; CHECK-NEXT: fmax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <4 x float> %a, zeroinitializer @@ -88,7 +88,7 @@ define <2 x double> @test_v2s64(<2 x double> %a) #0 { ; CHECK-LABEL: test_v2s64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: fmax v0.2d, v1.2d, v0.2d +; CHECK-NEXT: fmax v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret entry: %fcmp = fcmp olt <2 x double> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll index bd576d0..8c6e01d 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/vastart.ll @@ -3,7 +3,7 @@ declare void @llvm.va_start(ptr) -define void @test_va_start(ptr %list) { +define void @test_va_start(ptr %list, ...) { ; CHECK-LABEL: name: test_va_start ; CHECK: [[LIST:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-IOS: G_VASTART [[LIST]](p0) :: (store (s64) into %ir.list, align 1) diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index eee9a27..d3c8e3b 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -167,6 +167,7 @@ ; CHECK-NEXT: Register Coalescer ; CHECK-NEXT: Rename Disconnected Subregister Components ; CHECK-NEXT: Machine Instruction Scheduler +; CHECK-NEXT: AArch64 Post Coalescer pass ; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Debug Variable Analysis ; CHECK-NEXT: Live Stack Slot Analysis diff --git a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll index 089e171..c9fd2d3 100644 --- a/llvm/test/CodeGen/AArch64/arm64-abi_align.ll +++ b/llvm/test/CodeGen/AArch64/arm64-abi_align.ll @@ -518,4 +518,6 @@ attributes #5 = { nobuiltin } !1 = !{!"omnipotent char", !2} !2 = !{!"Simple C/C++ TBAA"} !3 = !{!"short", !1} -!4 = !{i64 0, i64 4, !0, i64 4, i64 2, !3, i64 8, i64 4, !0, i64 12, i64 2, !3, i64 16, i64 4, !0, i64 20, i64 2, !3} +!4 = !{i64 0, i64 4, !5, i64 4, i64 2, !6, i64 8, i64 4, !5, i64 12, i64 2, !6, i64 16, i64 4, !5, i64 20, i64 2, !6} +!5 = !{!0, !0, i64 0} +!6 = !{!3, !3, i64 0} diff --git a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll index 7557cea..6771a86 100644 --- a/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll +++ b/llvm/test/CodeGen/AArch64/lrint-conv-fp16.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-NOFP16 ; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-FP16 +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s --check-prefixes=CHECK-NOFP16 +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK-FP16 define i16 @testmhhs(half %x) { ; CHECK-NOFP16-LABEL: testmhhs: diff --git a/llvm/test/CodeGen/AArch64/lrint-conv.ll b/llvm/test/CodeGen/AArch64/lrint-conv.ll index b61d6f0..60393b4 100644 --- a/llvm/test/CodeGen/AArch64/lrint-conv.ll +++ b/llvm/test/CodeGen/AArch64/lrint-conv.ll @@ -1,9 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI - -; CHECK-GI: warning: Instruction selection used fallback path for testmswl -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for testmsll +; RUN: llc < %s -mtriple=aarch64 -global-isel | FileCheck %s define i32 @testmsws(float %x) { ; CHECK-LABEL: testmsws: diff --git a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll index cd5046a..8e3866f 100644 --- a/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll +++ b/llvm/test/CodeGen/AArch64/sme-avoid-coalescing-locally-streaming.ll @@ -25,8 +25,7 @@ define void @dont_coalesce_args(<2 x i64> %a) "aarch64_pstate_sm_body" nounwind ; CHECK-REGALLOC: bb.0 (%ir-block.0): ; CHECK-REGALLOC-NEXT: liveins: $q0 ; CHECK-REGALLOC-NEXT: {{ $}} - ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 - ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0) ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0 @@ -61,7 +60,6 @@ define <2 x i64> @dont_coalesce_res() "aarch64_pstate_sm_body" nounwind { ; CHECK-REGALLOC-NEXT: BL @scalable_res, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $z0 ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL renamable $q0, implicit killed $z0 - ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) @@ -94,17 +92,13 @@ define <2 x i64> @dont_coalesce_arg_that_is_also_res(<2 x i64> %a) "aarch64_psta ; CHECK-REGALLOC: bb.0 (%ir-block.0): ; CHECK-REGALLOC-NEXT: liveins: $q0 ; CHECK-REGALLOC-NEXT: {{ $}} - ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 - ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) + ; CHECK-REGALLOC-NEXT: STRQui $q0, %stack.0, 0 :: (store (s128) into %stack.0) ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) ; CHECK-REGALLOC-NEXT: renamable $q0 = KILL killed renamable $q0, implicit-def $z0 ; CHECK-REGALLOC-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp ; CHECK-REGALLOC-NEXT: BL @scalable_args, csr_aarch64_sve_aapcs, implicit-def dead $lr, implicit $sp, implicit $z0, implicit-def $sp ; CHECK-REGALLOC-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-REGALLOC-NEXT: renamable $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) - ; CHECK-REGALLOC-NEXT: renamable $q0 = COALESCER_BARRIER_FPR128 killed renamable $q0 - ; CHECK-REGALLOC-NEXT: STRQui killed renamable $q0, %stack.0, 0 :: (store (s128) into %stack.0) ; CHECK-REGALLOC-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def dead $q0, implicit $vg, implicit-def $vg ; CHECK-REGALLOC-NEXT: $q0 = LDRQui %stack.0, 0 :: (load (s128) from %stack.0) ; CHECK-REGALLOC-NEXT: RET_ReallyLR implicit $q0 diff --git a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll index dba3227..0737719 100644 --- a/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll +++ b/llvm/test/CodeGen/AArch64/sme-call-streaming-compatible-to-normal-fn-wihout-sme-attr.ll @@ -52,9 +52,7 @@ define void @streaming_compatible_arg(float %f) #0 { ; CHECK-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: tbz w19, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index cd348be..254e37e 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -331,9 +331,9 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill +; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload +; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-COMMON-NEXT: bl __addtf3 ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm @@ -392,9 +392,9 @@ define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounw ; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: stp s1, s0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldp s1, s0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf ; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm @@ -422,9 +422,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: bl __arm_sme_state -; CHECK-COMMON-NEXT: ldp s2, s0, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: and x19, x0, #0x1 -; CHECK-COMMON-NEXT: stp s2, s0, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll index d5bea72..0c674c5 100644 --- a/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll +++ b/llvm/test/CodeGen/AArch64/sme-pstate-sm-changing-call-disable-coalescing.ll @@ -1085,7 +1085,6 @@ define void @dont_coalesce_res_f32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1116,7 +1115,6 @@ define void @dont_coalesce_res_f64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1151,7 +1149,6 @@ define void @dont_coalesce_res_v1i8(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1183,7 +1180,6 @@ define void @dont_coalesce_res_v1i16(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1215,7 +1211,6 @@ define void @dont_coalesce_res_v1i32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1247,7 +1242,6 @@ define void @dont_coalesce_res_v1i64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1311,7 +1305,6 @@ define void @dont_coalesce_res_v1f32(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload @@ -1343,7 +1336,6 @@ define void @dont_coalesce_res_v1f64(ptr %ptr) #0 { ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll index 6e262cc..d675733 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll @@ -8,31 +8,27 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: sm_body_sm_compatible_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x8, x0, #0x1 ; CHECK-NEXT: tbnz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: fmov s0, wzr -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: tbnz w8, #0, .LBB0_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fmov s0, wzr +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret ret float zeroinitializer } diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll index 08dec22..cd6d45f5 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -247,15 +247,11 @@ define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_psta ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: bl cos ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index 6d2abf7..47b2429 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -469,11 +469,7 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: ldp s4, s0, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: stp s4, s0, [sp, #8] // 8-byte Folded Spill -; CHECK-NEXT: ldp d4, d0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: stp d4, d0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: tbz w19, #0, .LBB10_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll index de676ac..465fb466 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -405,11 +405,11 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr ; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: stp s1, s0, [sp, #24] // 8-byte Folded Spill -; CHECK-NEXT: stp d3, d2, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp s1, s0, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: ldp d3, d2, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: bl bar ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll index cf171f8..45ca784 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changing-call-disable-stackslot-scavenging.ll @@ -22,9 +22,9 @@ define void @test_no_stackslot_scavenging(float %f) #0 { ; CHECK-NEXT: stp x30, x24, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: bl use_f diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index c70006d..3e453a6 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1059,6 +1059,19 @@ define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) { ret <vscale x 16 x i8> %out } +; Immediate instruction form only supports positive values. +define <vscale x 16 x i8> @sqadd_b_negimm(<vscale x 16 x i8> %a) { +; CHECK-LABEL: sqadd_b_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, #128 // =0x80 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 16 x i8> undef, i8 -128, i32 0 + %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.x.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %splat) + ret <vscale x 16 x i8> %out +} + define <vscale x 8 x i16> @sqadd_h_lowimm(<vscale x 8 x i16> %a) { ; CHECK-LABEL: sqadd_h_lowimm: ; CHECK: // %bb.0: @@ -1083,6 +1096,19 @@ define <vscale x 8 x i16> @sqadd_h_highimm(<vscale x 8 x i16> %a) { ret <vscale x 8 x i16> %out } +; Immediate instruction form only supports positive values. +define <vscale x 8 x i16> @sqadd_h_negimm(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sqadd_h_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #1 // =0x1 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 8 x i16> undef, i16 -1, i32 0 + %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.x.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %splat) + ret <vscale x 8 x i16> %out +} + define <vscale x 4 x i32> @sqadd_s_lowimm(<vscale x 4 x i32> %a) { ; CHECK-LABEL: sqadd_s_lowimm: ; CHECK: // %bb.0: @@ -1107,6 +1133,19 @@ define <vscale x 4 x i32> @sqadd_s_highimm(<vscale x 4 x i32> %a) { ret <vscale x 4 x i32> %out } +; Immediate instruction form only supports positive values. +define <vscale x 4 x i32> @sqadd_s_negimm(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqadd_s_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #65280 // =0xff00 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 4 x i32> undef, i32 -65280, i32 0 + %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.x.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %splat) + ret <vscale x 4 x i32> %out +} + define <vscale x 2 x i64> @sqadd_d_lowimm(<vscale x 2 x i64> %a) { ; CHECK-LABEL: sqadd_d_lowimm: ; CHECK: // %bb.0: @@ -1131,6 +1170,19 @@ define <vscale x 2 x i64> @sqadd_d_highimm(<vscale x 2 x i64> %a) { ret <vscale x 2 x i64> %out } +; Immediate instruction form only supports positive values. +define <vscale x 2 x i64> @sqadd_d_negimm(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqadd_d_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #3840 // =0xf00 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 2 x i64> undef, i64 -3840, i32 0 + %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.x.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %splat) + ret <vscale x 2 x i64> %out +} + ; SQSUB define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) { @@ -1145,6 +1197,19 @@ define <vscale x 16 x i8> @sqsub_b_lowimm(<vscale x 16 x i8> %a) { ret <vscale x 16 x i8> %out } +; Immediate instruction form only supports positive values. +define <vscale x 16 x i8> @sqsub_b_negimm(<vscale x 16 x i8> %a) { +; CHECK-LABEL: sqsub_b_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, #1 // =0x1 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 16 x i8> undef, i8 -1, i32 0 + %splat = shufflevector <vscale x 16 x i8> %elt, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.x.nxv16i8(<vscale x 16 x i8> %a, + <vscale x 16 x i8> %splat) + ret <vscale x 16 x i8> %out +} + define <vscale x 8 x i16> @sqsub_h_lowimm(<vscale x 8 x i16> %a) { ; CHECK-LABEL: sqsub_h_lowimm: ; CHECK: // %bb.0: @@ -1169,6 +1234,19 @@ define <vscale x 8 x i16> @sqsub_h_highimm(<vscale x 8 x i16> %a) { ret <vscale x 8 x i16> %out } +; Immediate instruction form only supports positive values. +define <vscale x 8 x i16> @sqsub_h_negimm(<vscale x 8 x i16> %a) { +; CHECK-LABEL: sqsub_h_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #128 // =0x80 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 8 x i16> undef, i16 -128, i32 0 + %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.x.nxv8i16(<vscale x 8 x i16> %a, + <vscale x 8 x i16> %splat) + ret <vscale x 8 x i16> %out +} + define <vscale x 4 x i32> @sqsub_s_lowimm(<vscale x 4 x i32> %a) { ; CHECK-LABEL: sqsub_s_lowimm: ; CHECK: // %bb.0: @@ -1193,6 +1271,19 @@ define <vscale x 4 x i32> @sqsub_s_highimm(<vscale x 4 x i32> %a) { ret <vscale x 4 x i32> %out } +; Immediate instruction form only supports positive values. +define <vscale x 4 x i32> @sqsub_s_negimm(<vscale x 4 x i32> %a) { +; CHECK-LABEL: sqsub_s_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #32768 // =0x8000 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 4 x i32> undef, i32 -32768, i32 0 + %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.x.nxv4i32(<vscale x 4 x i32> %a, + <vscale x 4 x i32> %splat) + ret <vscale x 4 x i32> %out +} + define <vscale x 2 x i64> @sqsub_d_lowimm(<vscale x 2 x i64> %a) { ; CHECK-LABEL: sqsub_d_lowimm: ; CHECK: // %bb.0: @@ -1217,6 +1308,19 @@ define <vscale x 2 x i64> @sqsub_d_highimm(<vscale x 2 x i64> %a) { ret <vscale x 2 x i64> %out } +; Immediate instruction form only supports positive values. +define <vscale x 2 x i64> @sqsub_d_negimm(<vscale x 2 x i64> %a) { +; CHECK-LABEL: sqsub_d_negimm: +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #57344 // =0xe000 +; CHECK-NEXT: ret + %elt = insertelement <vscale x 2 x i64> undef, i64 -57344, i32 0 + %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.x.nxv2i64(<vscale x 2 x i64> %a, + <vscale x 2 x i64> %splat) + ret <vscale x 2 x i64> %out +} + ; UQADD define <vscale x 16 x i8> @uqadd_b_lowimm(<vscale x 16 x i8> %a) { diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll index 9b82d79..2052ebf 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-while.ll @@ -78,68 +78,16 @@ define <vscale x 2 x i1> @whilege_d_xx(i64 %a, i64 %b) { ret <vscale x 2 x i1> %out } -define <vscale x 2 x i1> @whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec() { -; CHECK-LABEL: whilege_d_ii_dont_fold_to_ptrue_larger_than_minvec: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: whilege p0.d, x8, xzr -; CHECK-NEXT: ret - %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilege.nxv2i1.i64(i64 3, i64 0) - ret <vscale x 2 x i1> %out -} - +; Ensure we don't convert constant decrementing while instructions to ptrue. define <vscale x 16 x i1> @whilege_b_ii() { ; CHECK-LABEL: whilege_b_ii: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b, vl6 -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { -; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_nonexistent_vl9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: whilege p0.b, x8, xzr -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 9, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilege_b_ii_vl_maximum() vscale_range(16, 16) { -; CHECK-LABEL: whilege_b_ii_vl_maximum: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: ret - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64 255, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_overflow() { -; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: movk w8, #32768, lsl #16 +; CHECK-NEXT: mov w8, #-2 // =0xfffffffe +; CHECK-NEXT: mov w9, #3 // =0x3 ; CHECK-NEXT: whilege p0.b, w9, w8 ; CHECK-NEXT: ret entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 2147483647, i32 -2147483646) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilege_b_ii_dont_fold_to_ptrue_increment_overflow() { -; CHECK-LABEL: whilege_b_ii_dont_fold_to_ptrue_increment_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: mov w9, #-2147483641 -; CHECK-NEXT: whilege p0.b, w9, w8 -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 -2147483641, i32 2147483647) + %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32 3, i32 -2) ret <vscale x 16 x i1> %out } @@ -219,69 +167,19 @@ define <vscale x 2 x i1> @whilehs_d_xx(i64 %a, i64 %b) { ret <vscale x 2 x i1> %out } -define <vscale x 2 x i1> @whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec() { -; CHECK-LABEL: whilehs_d_ii_dont_fold_to_ptrue_larger_than_minvec: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: whilehs p0.d, x8, xzr -; CHECK-NEXT: ret - %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehs.nxv2i1.i64(i64 3, i64 0) - ret <vscale x 2 x i1> %out -} - +; Ensure we don't convert constant decrementing while instructions to ptrue. define <vscale x 16 x i1> @whilehs_b_ii() { ; CHECK-LABEL: whilehs_b_ii: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b, vl7 +; CHECK-NEXT: mov w8, #2 // =0x2 +; CHECK-NEXT: mov w9, #8 // =0x8 +; CHECK-NEXT: whilehs p0.b, x9, x8 ; CHECK-NEXT: ret entry: %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 8, i64 2) ret <vscale x 16 x i1> %out } -define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { -; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_nonexistent_vl9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: whilehs p0.b, x8, xzr -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 9, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilehs_b_ii_vl_maximum() vscale_range(16, 16) { -; CHECK-LABEL: whilehs_b_ii_vl_maximum: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: ret - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i64(i64 255, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_overflow() { -; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov w9, #6 -; CHECK-NEXT: whilehs p0.b, w9, w8 -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 6, i32 4294967295) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilehs_b_ii_dont_fold_to_ptrue_increment_overflow() { -; CHECK-LABEL: whilehs_b_ii_dont_fold_to_ptrue_increment_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: whilehs p0.b, w8, wzr -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehs.nxv16i1.i32(i32 4294967295, i32 0) - ret <vscale x 16 x i1> %out -} - ; ; WHILEGT ; @@ -358,55 +256,16 @@ define <vscale x 2 x i1> @whilegt_d_xx(i64 %a, i64 %b) { ret <vscale x 2 x i1> %out } -define <vscale x 2 x i1> @whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec() { -; CHECK-LABEL: whilegt_d_ii_dont_fold_to_ptrue_larger_than_minvec: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: whilegt p0.d, x8, xzr -; CHECK-NEXT: ret - %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilegt.nxv2i1.i64(i64 3, i64 0) - ret <vscale x 2 x i1> %out -} - +; Ensure we don't convert constant decrementing while instructions to ptrue. define <vscale x 16 x i1> @whilegt_b_ii() { ; CHECK-LABEL: whilegt_b_ii: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b, vl5 -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilegt_b_ii_fold_to_ptrue_nonexistent_vl9() { -; CHECK-LABEL: whilegt_b_ii_fold_to_ptrue_nonexistent_vl9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: whilegt p0.b, x8, xzr -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 9, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilegt_b_ii_vl_maximum() vscale_range(16, 16) { -; CHECK-LABEL: whilegt_b_ii_vl_maximum: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: ret - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i64(i64 256, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilegt_b_ii_dont_fold_to_ptrue_overflow() { -; CHECK-LABEL: whilegt_b_ii_dont_fold_to_ptrue_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: mov w9, #-2147483641 +; CHECK-NEXT: mov w8, #-2 // =0xfffffffe +; CHECK-NEXT: mov w9, #3 // =0x3 ; CHECK-NEXT: whilegt p0.b, w9, w8 ; CHECK-NEXT: ret entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 -2147483641, i32 2147483647) + %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilegt.nxv16i1.i32(i32 3, i32 -2) ret <vscale x 16 x i1> %out } @@ -486,58 +345,19 @@ define <vscale x 2 x i1> @whilehi_d_xx(i64 %a, i64 %b) { ret <vscale x 2 x i1> %out } -define <vscale x 2 x i1> @whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec() { -; CHECK-LABEL: whilehi_d_ii_dont_fold_to_ptrue_larger_than_minvec: -; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: whilehi p0.d, x8, xzr -; CHECK-NEXT: ret - %out = call <vscale x 2 x i1> @llvm.aarch64.sve.whilehi.nxv2i1.i64(i64 3, i64 0) - ret <vscale x 2 x i1> %out -} - +; Ensure we don't convert constant decrementing while instructions to ptrue. define <vscale x 16 x i1> @whilehi_b_ii() { ; CHECK-LABEL: whilehi_b_ii: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.b, vl6 +; CHECK-NEXT: mov w8, #2 // =0x2 +; CHECK-NEXT: mov w9, #8 // =0x8 +; CHECK-NEXT: whilehi p0.b, x9, x8 ; CHECK-NEXT: ret entry: %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 8, i64 2) ret <vscale x 16 x i1> %out } -define <vscale x 16 x i1> @whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9() { -; CHECK-LABEL: whilehi_b_ii_dont_fold_to_ptrue_nonexistent_vl9: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: whilehi p0.b, x8, xzr -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 9, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilehi_b_ii_vl_maximum() vscale_range(16, 16) { -; CHECK-LABEL: whilehi_b_ii_vl_maximum: -; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: ret - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i64(i64 256, i64 0) - ret <vscale x 16 x i1> %out -} - -define <vscale x 16 x i1> @whilelhi_b_ii_dont_fold_to_ptrue_overflow() { -; CHECK-LABEL: whilelhi_b_ii_dont_fold_to_ptrue_overflow: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: whilehi p0.b, w9, w8 -; CHECK-NEXT: ret -entry: - %out = call <vscale x 16 x i1> @llvm.aarch64.sve.whilehi.nxv16i1.i32(i32 7, i32 4294967295) - ret <vscale x 16 x i1> %out -} - declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i32(i32, i32) declare <vscale x 16 x i1> @llvm.aarch64.sve.whilege.nxv16i1.i64(i64, i64) declare <vscale x 8 x i1> @llvm.aarch64.sve.whilege.nxv8i1.i32(i32, i32) diff --git a/llvm/test/CodeGen/AArch64/vector-lrint.ll b/llvm/test/CodeGen/AArch64/vector-lrint.ll index b7fcd11..db85b23 100644 --- a/llvm/test/CodeGen/AArch64/vector-lrint.ll +++ b/llvm/test/CodeGen/AArch64/vector-lrint.ll @@ -2,8 +2,7 @@ ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64 -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for lrint_v1f16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v2f16 +; CHECK-GI: warning: Instruction selection used fallback path for lrint_v2f16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v4f16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v8f16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lrint_v16i64_v16f16 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir index ee0e83c..0207613 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fcanonicalize.mir @@ -254,8 +254,8 @@ body: | ; CHECK-NEXT: %one_s32:_(s32) = G_ANYEXT %one(s16) ; CHECK-NEXT: %one_undef:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC %one_s32(s32), %undef(s32) ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat - ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %zero_undef, [[FMUL]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %one_undef, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FMUL]], %zero_undef + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %one_undef ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %two:_(s16) = G_FCONSTANT half 0xH4000 @@ -306,7 +306,7 @@ body: | ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(<2 x s16>) = G_FMUL [[COPY]], %two_splat ; CHECK-NEXT: %snan_undef_fcan:_(<2 x s16>) = G_FCANONICALIZE %snan_undef ; CHECK-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE %snan_undef_fcan, [[FMUL]] - ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE %qnan_undef, [[FMAXNUM_IEEE]] + ; CHECK-NEXT: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FMAXNUM_IEEE]], %qnan_undef ; CHECK-NEXT: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>) %0:_(<2 x s16>) = COPY $vgpr0 %two:_(s16) = G_FCONSTANT half 0xH4000 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir index d6321da..67e6de1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-and.mir @@ -318,7 +318,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: %val:_(s32) = COPY $vgpr4 ; CHECK-NEXT: %k255:_(s32) = G_CONSTANT i32 255 - ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %k255, %val + ; CHECK-NEXT: %umin0:_(s32) = G_UMIN %val, %k255 ; CHECK-NEXT: $vgpr0 = COPY %umin0(s32) %ptr0:_(p1) = COPY $vgpr0_vgpr1 %ptr1:_(p1) = COPY $vgpr2_vgpr3 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll index dc13dee..1d94d76 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll @@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2i16(<2 x i16> %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 17 ; GFX8-NEXT: v_min_i16_e32 v1, 17, v0 -; GFX8-NEXT: v_min_i16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_mov_b32_e32 v2, -12 ; GFX8-NEXT: v_max_i16_e32 v1, -12, v1 -; GFX8-NEXT: v_max_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll index 7e38762..a8233054 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll @@ -145,10 +145,10 @@ define <2 x i16> @test_max_K0min_K1Val__v2u16(<2 x i16> %a) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, 17 ; GFX8-NEXT: v_min_u16_e32 v1, 17, v0 -; GFX8-NEXT: v_min_u16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_u16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_mov_b32_e32 v2, 12 ; GFX8-NEXT: v_max_u16_e32 v1, 12, v1 -; GFX8-NEXT: v_max_u16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll index 07480a0..cc0f7e2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll @@ -983,7 +983,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3 ; CHECK-NEXT: v_mul_lo_u32 v6, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v7, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v8, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v8, v3, v5 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v6, v3 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v7 @@ -1010,7 +1010,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v5 -; CHECK-NEXT: v_mul_hi_u32 v7, v5, v3 +; CHECK-NEXT: v_mul_hi_u32 v7, v3, v5 ; CHECK-NEXT: v_mul_lo_u32 v5, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6 ; CHECK-NEXT: v_mul_hi_u32 v9, v3, v6 @@ -1058,7 +1058,7 @@ define i64 @v_urem_i64_oddk_denom(i64 %num) { ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, v2 -; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3 +; CHECK-NEXT: v_mul_hi_u32 v3, v3, v2 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v2 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3 @@ -1265,10 +1265,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4 -; GISEL-NEXT: v_mul_hi_u32 v9, v4, v9 +; GISEL-NEXT: v_mul_hi_u32 v9, v9, v4 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8 ; GISEL-NEXT: v_mul_lo_u32 v12, v5, v4 -; GISEL-NEXT: v_mul_hi_u32 v5, v4, v5 +; GISEL-NEXT: v_mul_hi_u32 v5, v5, v4 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v4 @@ -1339,7 +1339,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5 ; CGP-NEXT: v_mul_lo_u32 v8, v6, v7 ; CGP-NEXT: v_mul_lo_u32 v9, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v10, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v10, v5, v7 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v5 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10 ; CGP-NEXT: v_mul_lo_u32 v10, v6, v9 @@ -1366,7 +1366,7 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; CGP-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc ; CGP-NEXT: v_mul_lo_u32 v8, v5, v7 -; CGP-NEXT: v_mul_hi_u32 v9, v7, v5 +; CGP-NEXT: v_mul_hi_u32 v9, v5, v7 ; CGP-NEXT: v_mul_lo_u32 v7, v6, v7 ; CGP-NEXT: v_mul_lo_u32 v10, v6, v8 ; CGP-NEXT: v_mul_hi_u32 v11, v5, v8 @@ -1433,10 +1433,10 @@ define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) { ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc ; CGP-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; CGP-NEXT: v_mul_lo_u32 v9, v7, v4 -; CGP-NEXT: v_mul_hi_u32 v7, v4, v7 +; CGP-NEXT: v_mul_hi_u32 v7, v7, v4 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10 ; CGP-NEXT: v_mul_lo_u32 v11, v5, v4 -; CGP-NEXT: v_mul_hi_u32 v5, v4, v5 +; CGP-NEXT: v_mul_hi_u32 v5, v5, v4 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v15, v8 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10 ; CGP-NEXT: v_mul_lo_u32 v8, v8, v4 diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll new file mode 100644 index 0000000..6b835bb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-libcall-sincos-pass-ordering.ll @@ -0,0 +1,77 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -O1 -amdgpu-prelink %s | FileCheck %s + +; Make sure that sin+cos -> sincos simplification happens after +; initial IR simplifications, otherwise we can't identify the common +; argument value. + +@.str = private unnamed_addr addrspace(4) constant [21 x i8] c"x: %f, y: %f, z: %f\0A\00", align 1 + +; Should have call to sincos declarations, not calls to the asm pseudo-libcalls +define protected amdgpu_kernel void @swdev456865(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(1) %out2, float noundef %x) #0 { +; CHECK-LABEL: define protected amdgpu_kernel void @swdev456865( +; CHECK-SAME: ptr addrspace(1) nocapture writeonly [[OUT0:%.*]], ptr addrspace(1) nocapture writeonly [[OUT1:%.*]], ptr addrspace(1) nocapture writeonly [[OUT2:%.*]], float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[I_I:%.*]] = call float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[I_I2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = fadd float [[I_I]], [[I_I2]] +; CHECK-NEXT: [[CONV:%.*]] = fpext float [[X]] to double +; CHECK-NEXT: [[CONV5:%.*]] = fpext float [[ADD]] to double +; CHECK-NEXT: store double [[CONV]], ptr addrspace(1) [[OUT0]], align 8 +; CHECK-NEXT: store double [[CONV5]], ptr addrspace(1) [[OUT1]], align 8 +; CHECK-NEXT: store double [[CONV5]], ptr addrspace(1) [[OUT2]], align 8 +; CHECK-NEXT: ret void +; +entry: + %x.addr = alloca float, align 4, addrspace(5) + %y = alloca float, align 4, addrspace(5) + %z = alloca float, align 4, addrspace(5) + store float %x, ptr addrspace(5) %x.addr, align 4 + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %y) + %i = load float, ptr addrspace(5) %x.addr, align 4 + %call = call float @_Z3sinf(float noundef %i) #3 + %i1 = load float, ptr addrspace(5) %x.addr, align 4 + %call1 = call float @_Z3cosf(float noundef %i1) #3 + %add = fadd float %call, %call1 + store float %add, ptr addrspace(5) %y, align 4 + call void @llvm.lifetime.start.p5(i64 4, ptr addrspace(5) %z) + %i2 = load float, ptr addrspace(5) %x.addr, align 4 + %call2 = call float @_Z3cosf(float noundef %i2) #3 + %i3 = load float, ptr addrspace(5) %x.addr, align 4 + %call3 = call float @_Z3sinf(float noundef %i3) #3 + %add4 = fadd float %call2, %call3 + store float %add4, ptr addrspace(5) %z, align 4 + %i4 = load float, ptr addrspace(5) %x.addr, align 4 + %conv = fpext float %i4 to double + %i5 = load float, ptr addrspace(5) %y, align 4 + %conv5 = fpext float %i5 to double + %i6 = load float, ptr addrspace(5) %z, align 4 + %conv6 = fpext float %i6 to double + store double %conv, ptr addrspace(1) %out0, align 8 + store double %conv5, ptr addrspace(1) %out1, align 8 + store double %conv6, ptr addrspace(1) %out2, align 8 + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %z) + call void @llvm.lifetime.end.p5(i64 4, ptr addrspace(5) %y) + ret void +} + +declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture) #1 +declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1 + +define internal float @_Z3cosf(float noundef %arg) #2 { +bb: + %i = tail call float asm "pseudo-libcall-cos %0, %1", "=v,v"(float noundef %arg) #2 + ret float %i +} + +define internal float @_Z3sinf(float noundef %arg) #2 { +bb: + %i = tail call float asm "pseudo-libcall-sin %0, %1", "=v,v"(float noundef %arg) #2 + ret float %i +} + +attributes #0 = { norecurse nounwind } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nofree norecurse nounwind willreturn memory(none) } +attributes #3 = { nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index 5c56276..9646d19 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -884,10 +884,9 @@ entry: define float @sincos_f32_unused_result_cos(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_cos -; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP0:%.*]] = tail call contract float @_Z3sinf(float [[X]]) ; CHECK-NEXT: ret float [[TMP0]] ; entry: @@ -900,11 +899,9 @@ entry: define float @sincos_f32_unused_result_sin(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_sin -; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR4]] { +; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) -; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) ; CHECK-NEXT: ret float [[TMP1]] ; entry: @@ -917,13 +914,11 @@ entry: define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) { ; CHECK-LABEL: define void @sincos_f32_repeated_uses -; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { +; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[__SINCOS_3:%.*]] = alloca float, align 4, addrspace(5) ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_3]]) -; CHECK-NEXT: [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_3]], align 4 ; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: store volatile float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index bf4302c..4c9c34d 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -38342,12 +38342,11 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX10-NEXT: v_and_b32_e32 v4, 1, v4 ; GFX10-NEXT: v_and_b32_e32 v6, 1, v6 -; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v8, 1, v8 ; GFX10-NEXT: v_and_b32_e32 v10, 1, v10 +; GFX10-NEXT: v_writelane_b32 v40, s31, 1 ; GFX10-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX10-NEXT: v_and_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_and_b32_e32 v5, 1, v5 ; GFX10-NEXT: v_and_b32_e32 v7, 1, v7 ; GFX10-NEXT: v_and_b32_e32 v9, 1, v9 @@ -38366,7 +38365,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cmp_eq_u32_e64 s17, 1, v4 ; GFX10-NEXT: v_cmp_eq_u32_e64 s18, 1, v2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s19, 1, v0 -; GFX10-NEXT: v_writelane_b32 v40, s35, 3 +; GFX10-NEXT: v_writelane_b32 v40, s34, 2 ; GFX10-NEXT: v_cmp_eq_u32_e64 s20, 1, v27 ; GFX10-NEXT: v_cmp_eq_u32_e64 s21, 1, v25 ; GFX10-NEXT: v_cmp_eq_u32_e64 s22, 1, v23 @@ -38377,10 +38376,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cmp_eq_u32_e64 s27, 1, v13 ; GFX10-NEXT: v_cmp_eq_u32_e64 s28, 1, v11 ; GFX10-NEXT: v_cmp_eq_u32_e64 s29, 1, v7 -; GFX10-NEXT: v_cmp_eq_u32_e64 s30, 1, v3 -; GFX10-NEXT: v_cmp_eq_u32_e64 s31, 1, v1 -; GFX10-NEXT: v_cmp_eq_u32_e64 s34, 1, v5 -; GFX10-NEXT: v_cmp_eq_u32_e64 s35, 1, v9 +; GFX10-NEXT: v_cmp_eq_u32_e64 vcc_hi, 1, v3 +; GFX10-NEXT: v_cmp_eq_u32_e64 s30, 1, v1 +; GFX10-NEXT: v_cmp_eq_u32_e64 s31, 1, v5 +; GFX10-NEXT: v_cmp_eq_u32_e64 s34, 1, v9 ; GFX10-NEXT: s_waitcnt vmcnt(32) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v31 ; GFX10-NEXT: s_waitcnt vmcnt(31) @@ -38460,10 +38459,10 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_cndmask_b32_e64 v6, v29, v39, s27 ; GFX10-NEXT: v_cndmask_b32_e64 v5, v28, v26, s28 ; GFX10-NEXT: v_cndmask_b32_e64 v20, v51, v20, s29 -; GFX10-NEXT: v_cndmask_b32_e64 v0, v14, v12, s31 -; GFX10-NEXT: v_cndmask_b32_e64 v1, v55, v16, s30 -; GFX10-NEXT: v_cndmask_b32_e64 v2, v53, v18, s34 -; GFX10-NEXT: v_cndmask_b32_e64 v12, v24, v22, s35 +; GFX10-NEXT: v_cndmask_b32_e64 v0, v14, v12, s30 +; GFX10-NEXT: v_cndmask_b32_e64 v1, v55, v16, vcc_hi +; GFX10-NEXT: v_cndmask_b32_e64 v2, v53, v18, s31 +; GFX10-NEXT: v_cndmask_b32_e64 v12, v24, v22, s34 ; GFX10-NEXT: v_cndmask_b32_e64 v16, v4, v3, s4 ; GFX10-NEXT: v_perm_b32 v0, v0, v64, 0x5040100 ; GFX10-NEXT: v_perm_b32 v1, v1, v54, 0x5040100 @@ -38481,7 +38480,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x ; GFX10-NEXT: v_perm_b32 v13, v66, v13, 0x5040100 ; GFX10-NEXT: v_perm_b32 v14, v65, v17, 0x5040100 ; GFX10-NEXT: v_perm_b32 v15, v16, v15, 0x5040100 -; GFX10-NEXT: v_readlane_b32 s35, v40, 3 ; GFX10-NEXT: v_readlane_b32 s34, v40, 2 ; GFX10-NEXT: v_readlane_b32 s31, v40, 1 ; GFX10-NEXT: v_readlane_b32 s30, v40, 0 diff --git a/llvm/test/CodeGen/AMDGPU/build_vector.ll b/llvm/test/CodeGen/AMDGPU/build_vector.ll index 37412ac..9975513 100644 --- a/llvm/test/CodeGen/AMDGPU/build_vector.ll +++ b/llvm/test/CodeGen/AMDGPU/build_vector.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=GFX8,GFX678,ALL ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10,GFX1011,ALL ; RUN: llc < %s -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11,GFX1011,ALL +; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 | FileCheck %s --check-prefixes=GFX940,ALL ; ALL-LABEL: {{^}}build_vector2: ; R600: MOV @@ -96,3 +97,99 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 store <2 x i16> %ins.1, ptr addrspace(1) %out ret void } + +; R600-LABEL: build_v2i32_from_v4i16_shuffle: +; R600: ; %bb.0: ; %entry +; R600-NEXT: ALU 0, @10, KC0[], KC1[] +; R600-NEXT: TEX 1 @6 +; R600-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: Fetch clause starting at 6: +; R600-NEXT: VTX_READ_16 T1.X, T0.X, 48, #3 +; R600-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 +; R600-NEXT: ALU clause starting at 10: +; R600-NEXT: MOV * T0.X, 0.0, +; R600-NEXT: ALU clause starting at 11: +; R600-NEXT: LSHL * T0.Y, T1.X, literal.x, +; R600-NEXT: 16(2.242078e-44), 0(0.000000e+00) +; R600-NEXT: LSHL T0.X, T0.X, literal.x, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, +; R600-NEXT: 16(2.242078e-44), 2(2.802597e-45) +; +; GFX6-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX6: ; %bb.0: ; %entry +; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 +; GFX6-NEXT: s_mov_b32 s7, 0xf000 +; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_lshl_b32 s3, s3, 16 +; GFX6-NEXT: s_lshl_b32 s2, s2, 16 +; GFX6-NEXT: s_mov_b32 s6, -1 +; GFX6-NEXT: s_mov_b32 s4, s0 +; GFX6-NEXT: s_mov_b32 s5, s1 +; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_mov_b32_e32 v1, s3 +; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX6-NEXT: s_endpgm +; +; GFX8-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX8: ; %bb.0: ; %entry +; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX8-NEXT: s_mov_b32 s7, 0xf000 +; GFX8-NEXT: s_mov_b32 s6, -1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: s_mov_b32 s4, s0 +; GFX8-NEXT: s_mov_b32 s5, s1 +; GFX8-NEXT: s_lshl_b32 s0, s3, 16 +; GFX8-NEXT: s_lshl_b32 s1, s2, 16 +; GFX8-NEXT: v_mov_b32_e32 v0, s1 +; GFX8-NEXT: v_mov_b32_e32 v1, s0 +; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GFX8-NEXT: s_endpgm +; +; GFX10-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_lshl_b32 s2, s2, 16 +; GFX10-NEXT: s_lshl_b32 s3, s3, 16 +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_mov_b32_e32 v1, s3 +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_lshl_b32 s2, s2, 16 +; GFX11-NEXT: s_lshl_b32 s3, s3, 16 +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: v_mov_b32_e32 v1, s3 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX940-LABEL: build_v2i32_from_v4i16_shuffle: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GFX940-NEXT: v_mov_b32_e32 v2, 0 +; GFX940-NEXT: s_waitcnt lgkmcnt(0) +; GFX940-NEXT: s_lshl_b32 s3, s3, 16 +; GFX940-NEXT: s_lshl_b32 s2, s2, 16 +; GFX940-NEXT: v_mov_b32_e32 v0, s2 +; GFX940-NEXT: v_mov_b32_e32 v1, s3 +; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1 +; GFX940-NEXT: s_endpgm +define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) { +entry: + %shuf = shufflevector <4 x i16> %in, <4 x i16> zeroinitializer, <2 x i32> <i32 0, i32 2> + %zextended = zext <2 x i16> %shuf to <2 x i32> + %shifted = shl <2 x i32> %zextended, <i32 16, i32 16> + store <2 x i32> %shifted, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll index bb2bad9..a04bf44 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll +++ b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll @@ -99,12 +99,6 @@ define float @fold_abs_in_branch_poison(float %arg1, float %arg2) { ; GFX10-LABEL: fold_abs_in_branch_poison: ; GFX10: ; %bb.0: ; %entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_add_f32_e64 v0, |s4|, |s4| -; GFX10-NEXT: v_cmp_lt_f32_e32 vcc_lo, 1.0, v0 -; GFX10-NEXT: s_cbranch_vccnz .LBB3_2 -; GFX10-NEXT: ; %bb.1: ; %if -; GFX10-NEXT: v_mul_f32_e64 v0, 0x3e4ccccd, |s4| -; GFX10-NEXT: .LBB3_2: ; %exit ; GFX10-NEXT: s_setpc_b64 s[30:31] entry: %0 = fadd reassoc nnan nsz arcp contract afn float %arg1, %arg2 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll index ec3c08e..da64c37 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll @@ -1259,17 +1259,17 @@ define <4 x i1> @isnan_v4f16(<4 x half> %x) nounwind { ; GFX10SELDAG-LABEL: isnan_v4f16: ; GFX10SELDAG: ; %bb.0: ; GFX10SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10SELDAG-NEXT: v_mov_b32_e32 v2, 3 -; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v0, 3 -; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s5 -; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s5, v0, v2 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v0, 3 +; GFX10SELDAG-NEXT: v_mov_b32_e32 v3, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s4, v1, 3 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v0, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v5 +; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, s4 +; GFX10SELDAG-NEXT: v_cmp_class_f16_sdwa s4, v1, v3 src0_sel:WORD_1 src1_sel:DWORD +; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v4 ; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 -; GFX10SELDAG-NEXT: v_mov_b32_e32 v0, v4 -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v5, 0, 1, s5 -; GFX10SELDAG-NEXT: v_cmp_class_f16_e64 s5, v1, 3 -; GFX10SELDAG-NEXT: v_mov_b32_e32 v1, v5 -; GFX10SELDAG-NEXT: v_cndmask_b32_e64 v2, 0, 1, s5 ; GFX10SELDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX10GLISEL-LABEL: isnan_v4f16: diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll index ab6a9dc..a87fa8b 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -7404,35 +7404,35 @@ define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(ptr addrspace(1) % ; GFX12-NEXT: v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v9, s31 ; GFX12-NEXT: v_dual_mov_b32 v8, s30 :: v_dual_mov_b32 v11, s35 ; GFX12-NEXT: v_dual_mov_b32 v10, s34 :: v_dual_mov_b32 v3, s5 -; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 -; GFX12-NEXT: s_lshr_b32 s12, s0, 16 -; GFX12-NEXT: s_mov_b32 s14, s1 -; GFX12-NEXT: s_lshr_b32 s16, s1, 16 -; GFX12-NEXT: s_bfe_i64 s[0:1], s[2:3], 0x100000 +; GFX12-NEXT: s_bfe_i64 s[16:17], s[2:3], 0x100000 ; GFX12-NEXT: s_lshr_b32 s2, s2, 16 ; GFX12-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v0, s28 :: v_dual_mov_b32 v5, s23 ; GFX12-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v13, s25 +; GFX12-NEXT: s_mov_b32 s12, s1 +; GFX12-NEXT: s_lshr_b32 s14, s1, 16 ; GFX12-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v12, s24 :: v_dual_mov_b32 v15, s27 ; GFX12-NEXT: v_dual_mov_b32 v14, s26 :: v_dual_mov_b32 v7, s7 +; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x100000 +; GFX12-NEXT: s_lshr_b32 s0, s0, 16 ; GFX12-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v17, s19 +; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 -; GFX12-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 ; GFX12-NEXT: v_dual_mov_b32 v16, s18 :: v_dual_mov_b32 v19, s21 ; GFX12-NEXT: v_mov_b32_e32 v18, s20 -; GFX12-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 +; GFX12-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 ; GFX12-NEXT: s_clause 0x1 ; GFX12-NEXT: global_store_b128 v24, v[8:11], s[8:9] offset:80 ; GFX12-NEXT: global_store_b128 v24, v[0:3], s[8:9] offset:64 -; GFX12-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0 +; GFX12-NEXT: v_dual_mov_b32 v1, s17 :: v_dual_mov_b32 v0, s16 ; GFX12-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_mov_b32 v2, s2 -; GFX12-NEXT: v_dual_mov_b32 v9, s15 :: v_dual_mov_b32 v8, s14 -; GFX12-NEXT: v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16 +; GFX12-NEXT: v_dual_mov_b32 v9, s13 :: v_dual_mov_b32 v8, s12 +; GFX12-NEXT: v_dual_mov_b32 v11, s15 :: v_dual_mov_b32 v10, s14 ; GFX12-NEXT: v_dual_mov_b32 v21, s11 :: v_dual_mov_b32 v20, s10 -; GFX12-NEXT: v_dual_mov_b32 v23, s13 :: v_dual_mov_b32 v22, s12 +; GFX12-NEXT: v_dual_mov_b32 v23, s1 :: v_dual_mov_b32 v22, s0 ; GFX12-NEXT: s_clause 0x5 ; GFX12-NEXT: global_store_b128 v24, v[12:15], s[8:9] offset:112 ; GFX12-NEXT: global_store_b128 v24, v[4:7], s[8:9] offset:96 diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll index 952827b..889755c 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i8.ll @@ -8808,73 +8808,73 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o ; GFX12-NEXT: v_lshrrev_b16 v2, 8, s6 ; GFX12-NEXT: v_lshrrev_b16 v4, 8, s5 ; GFX12-NEXT: v_lshrrev_b16 v8, 8, s2 -; GFX12-NEXT: s_lshr_b32 s24, s7, 16 +; GFX12-NEXT: s_lshr_b32 s22, s7, 16 ; GFX12-NEXT: v_bfe_i32 v31, v1, 0, 8 -; GFX12-NEXT: s_lshr_b32 s42, s2, 24 -; GFX12-NEXT: s_mov_b32 s48, s7 +; GFX12-NEXT: s_lshr_b32 s40, s2, 24 +; GFX12-NEXT: s_mov_b32 s46, s7 ; GFX12-NEXT: v_lshrrev_b16 v5, 8, s4 ; GFX12-NEXT: v_lshrrev_b16 v7, 8, s1 -; GFX12-NEXT: s_lshr_b32 s26, s6, 16 -; GFX12-NEXT: s_lshr_b32 s44, s1, 16 +; GFX12-NEXT: s_lshr_b32 s24, s6, 16 +; GFX12-NEXT: s_lshr_b32 s42, s1, 16 ; GFX12-NEXT: s_ashr_i64 s[58:59], s[6:7], 56 -; GFX12-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x80000 ; GFX12-NEXT: v_lshrrev_b16 v6, 8, s3 ; GFX12-NEXT: v_lshrrev_b16 v3, 8, s0 -; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s24 -; GFX12-NEXT: s_lshr_b32 s28, s6, 24 -; GFX12-NEXT: s_lshr_b32 s30, s5, 16 -; GFX12-NEXT: s_lshr_b32 s40, s2, 16 +; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v33, s22 +; GFX12-NEXT: s_lshr_b32 s26, s6, 24 +; GFX12-NEXT: s_lshr_b32 s28, s5, 16 +; GFX12-NEXT: s_lshr_b32 s38, s2, 16 ; GFX12-NEXT: v_bfe_i32 v11, v8, 0, 8 ; GFX12-NEXT: v_bfe_i32 v23, v4, 0, 8 ; GFX12-NEXT: v_bfe_i32 v27, v2, 0, 8 ; GFX12-NEXT: v_ashrrev_i32_e32 v32, 31, v31 -; GFX12-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v34, s25 :: v_dual_mov_b32 v35, s58 -; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s26 -; GFX12-NEXT: v_dual_mov_b32 v56, s43 :: v_dual_mov_b32 v29, s48 -; GFX12-NEXT: v_mov_b32_e32 v30, s49 -; GFX12-NEXT: s_lshr_b32 s46, s0, 24 -; GFX12-NEXT: s_mov_b32 s50, s5 -; GFX12-NEXT: s_mov_b32 s52, s3 -; GFX12-NEXT: s_lshr_b32 s34, s4, 16 -; GFX12-NEXT: s_lshr_b32 s36, s4, 24 -; GFX12-NEXT: s_ashr_i64 s[22:23], s[2:3], 56 +; GFX12-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v34, s23 :: v_dual_mov_b32 v35, s58 +; GFX12-NEXT: v_dual_mov_b32 v36, s59 :: v_dual_mov_b32 v37, s24 +; GFX12-NEXT: v_dual_mov_b32 v56, s41 :: v_dual_mov_b32 v29, s46 +; GFX12-NEXT: v_mov_b32_e32 v30, s47 +; GFX12-NEXT: s_lshr_b32 s44, s0, 24 +; GFX12-NEXT: s_mov_b32 s48, s5 +; GFX12-NEXT: s_mov_b32 s50, s3 +; GFX12-NEXT: s_lshr_b32 s30, s4, 16 +; GFX12-NEXT: s_lshr_b32 s34, s4, 24 +; GFX12-NEXT: s_ashr_i64 s[54:55], s[2:3], 56 ; GFX12-NEXT: s_ashr_i64 s[56:57], s[4:5], 56 ; GFX12-NEXT: v_bfe_i32 v7, v7, 0, 8 ; GFX12-NEXT: v_bfe_i32 v19, v5, 0, 8 -; GFX12-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[28:29], s[28:29], 0x80000 -; GFX12-NEXT: s_lshr_b32 s38, s3, 16 -; GFX12-NEXT: s_mov_b32 s54, s1 +; GFX12-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x80000 +; GFX12-NEXT: s_lshr_b32 s36, s3, 16 +; GFX12-NEXT: s_mov_b32 s52, s1 ; GFX12-NEXT: s_bfe_i64 s[12:13], s[2:3], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[2:3], s[52:53], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[4:5], s[50:51], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[6:7], s[46:47], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[2:3], s[50:51], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[4:5], s[48:49], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[6:7], s[44:45], 0x80000 ; GFX12-NEXT: s_lshr_b32 s20, s0, 16 ; GFX12-NEXT: s_ashr_i64 s[18:19], s[0:1], 56 ; GFX12-NEXT: v_bfe_i32 v3, v3, 0, 8 ; GFX12-NEXT: v_bfe_i32 v15, v6, 0, 8 -; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 ; GFX12-NEXT: s_bfe_i64 s[34:35], s[34:35], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v38, s27 :: v_dual_mov_b32 v39, s28 -; GFX12-NEXT: v_dual_mov_b32 v40, s29 :: v_dual_mov_b32 v41, s30 -; GFX12-NEXT: v_dual_mov_b32 v42, s31 :: v_dual_mov_b32 v43, s56 -; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s34 -; GFX12-NEXT: v_dual_mov_b32 v52, s23 :: v_dual_mov_b32 v53, s40 -; GFX12-NEXT: v_dual_mov_b32 v54, s41 :: v_dual_mov_b32 v55, s42 +; GFX12-NEXT: s_bfe_i64 s[30:31], s[30:31], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v38, s25 :: v_dual_mov_b32 v39, s26 +; GFX12-NEXT: v_dual_mov_b32 v40, s27 :: v_dual_mov_b32 v41, s28 +; GFX12-NEXT: v_dual_mov_b32 v42, s29 :: v_dual_mov_b32 v43, s56 +; GFX12-NEXT: v_dual_mov_b32 v44, s57 :: v_dual_mov_b32 v45, s30 +; GFX12-NEXT: v_dual_mov_b32 v52, s55 :: v_dual_mov_b32 v53, s38 +; GFX12-NEXT: v_dual_mov_b32 v54, s39 :: v_dual_mov_b32 v55, s40 ; GFX12-NEXT: s_bfe_i64 s[10:11], s[0:1], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[0:1], s[54:55], 0x80000 +; GFX12-NEXT: s_bfe_i64 s[0:1], s[52:53], 0x80000 ; GFX12-NEXT: v_ashrrev_i32_e32 v12, 31, v11 ; GFX12-NEXT: v_ashrrev_i32_e32 v24, 31, v23 ; GFX12-NEXT: v_ashrrev_i32_e32 v28, 31, v27 ; GFX12-NEXT: global_store_b128 v0, v[33:36], s[8:9] offset:240 -; GFX12-NEXT: v_mov_b32_e32 v33, s44 +; GFX12-NEXT: v_mov_b32_e32 v33, s42 ; GFX12-NEXT: global_store_b128 v0, v[29:32], s[8:9] offset:224 ; GFX12-NEXT: v_dual_mov_b32 v25, s16 :: v_dual_mov_b32 v26, s17 ; GFX12-NEXT: v_dual_mov_b32 v32, s7 :: v_dual_mov_b32 v21, s4 @@ -8882,16 +8882,16 @@ define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(ptr addrspace(1) %o ; GFX12-NEXT: v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v9, s12 ; GFX12-NEXT: v_dual_mov_b32 v10, s13 :: v_dual_mov_b32 v5, s0 ; GFX12-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x80000 -; GFX12-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x80000 -; GFX12-NEXT: v_dual_mov_b32 v46, s35 :: v_dual_mov_b32 v47, s36 -; GFX12-NEXT: v_dual_mov_b32 v48, s37 :: v_dual_mov_b32 v49, s38 -; GFX12-NEXT: v_dual_mov_b32 v34, s45 :: v_dual_mov_b32 v35, s18 +; GFX12-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x80000 +; GFX12-NEXT: v_dual_mov_b32 v46, s31 :: v_dual_mov_b32 v47, s34 +; GFX12-NEXT: v_dual_mov_b32 v48, s35 :: v_dual_mov_b32 v49, s36 +; GFX12-NEXT: v_dual_mov_b32 v34, s43 :: v_dual_mov_b32 v35, s18 ; GFX12-NEXT: v_dual_mov_b32 v36, s19 :: v_dual_mov_b32 v29, s20 ; GFX12-NEXT: v_ashrrev_i32_e32 v8, 31, v7 ; GFX12-NEXT: v_ashrrev_i32_e32 v20, 31, v19 ; GFX12-NEXT: v_dual_mov_b32 v18, s15 :: v_dual_mov_b32 v13, s2 ; GFX12-NEXT: v_dual_mov_b32 v6, s1 :: v_dual_mov_b32 v1, s10 -; GFX12-NEXT: v_dual_mov_b32 v50, s39 :: v_dual_mov_b32 v51, s22 +; GFX12-NEXT: v_dual_mov_b32 v50, s37 :: v_dual_mov_b32 v51, s54 ; GFX12-NEXT: v_dual_mov_b32 v30, s21 :: v_dual_mov_b32 v31, s6 ; GFX12-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX12-NEXT: v_ashrrev_i32_e32 v16, 31, v15 diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll new file mode 100644 index 0000000..358f42d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/memcpy-libcall.ll @@ -0,0 +1,2696 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 %s -o - | FileCheck %s + +%struct.S = type { [32 x i32] } + +@shared = addrspace(3) global %struct.S undef, align 4 + +define amdgpu_kernel void @memcpy_p0_p0_minsize(ptr %dest, ptr readonly %src) #0 { +; CHECK-LABEL: memcpy_p0_p0_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:1 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:2 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:2 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:5 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:5 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:6 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:6 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:7 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:7 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:8 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:9 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:9 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:10 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:10 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:11 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:11 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:12 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:13 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:13 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:14 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:14 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:15 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:15 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:16 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:17 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:17 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:18 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:19 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:20 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:21 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:22 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:23 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:24 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:25 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:26 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:27 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:28 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:29 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:30 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:31 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:32 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:33 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:34 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:35 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:36 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:37 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:38 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:39 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:40 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:41 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:42 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:43 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:44 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:45 +; CHECK-NEXT: flat_load_ubyte v0, v[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v0 offset:46 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p1_minsize(ptr addrspace(1) %dest, ptr addrspace(1) %src) #0 { +; CHECK-LABEL: memcpy_p1_p1_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:39 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dest, ptr addrspace(1) %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p4_minsize(ptr addrspace(1) %global, ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p1_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:112 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %global, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p5_p4_minsize(ptr addrspace(5) %local, ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p5_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:1 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:2 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:3 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:4 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:5 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:6 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:7 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:8 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:9 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:11 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:12 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:13 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:14 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:15 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, s2 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:16 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:17 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:1 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:2 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:3 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:4 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:5 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:6 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:7 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:8 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:9 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:10 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:11 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:12 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:13 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:14 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:15 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:16 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:17 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:18 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:19 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:20 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:21 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:22 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:23 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:24 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:25 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:26 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:27 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:28 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:47 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:29 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:30 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:49 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:31 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:50 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:32 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:51 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:33 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:34 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:53 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:35 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:36 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:55 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:37 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:38 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:57 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:39 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:58 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:40 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:59 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:41 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:42 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:61 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:43 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:62 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:44 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:63 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:45 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:46 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:65 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:47 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:66 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:48 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:67 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:49 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:50 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:69 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:51 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:52 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:71 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:53 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:54 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:73 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:55 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:74 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:56 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:75 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:57 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:58 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:77 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:59 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:78 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:60 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:79 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:61 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:62 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:81 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:63 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:82 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:64 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:83 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:65 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:66 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:85 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:67 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:86 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:68 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:87 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:69 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:71 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:72 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:73 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:74 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:75 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:76 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:77 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:78 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:79 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:80 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:81 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:82 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:101 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:83 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:102 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:84 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:103 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:85 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:86 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:105 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:87 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:106 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:88 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:107 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:100 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:108 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:109 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:110 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:111 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:112 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:113 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:114 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:115 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:116 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:117 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:118 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:101 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:102 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:103 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:104 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:105 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:106 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:107 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v21, v0, s[0:1] offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:109 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:110 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:111 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:113 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:114 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:115 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:117 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:118 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v21, v1, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %local, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p5_minsize(ptr %generic, ptr addrspace(5) %src) #0 { +; CHECK-LABEL: memcpy_p0_p5_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:1 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:2 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:3 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:4 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:5 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:6 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:7 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:8 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:9 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:10 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:11 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:13 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:14 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:15 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:16 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:17 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_byte v[0:1], v3 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:1 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:19 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:2 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:20 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:3 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:21 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:4 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:22 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:5 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:23 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:6 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:24 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:7 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:25 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:8 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:26 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:9 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:27 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:10 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:28 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:11 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:29 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:12 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:30 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:13 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:31 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:14 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:32 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:15 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:33 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:16 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:34 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:17 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:35 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:18 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:19 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:37 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:20 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:38 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:21 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:39 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:22 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:40 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:23 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:41 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:24 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:42 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:25 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:43 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:26 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:44 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:27 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:45 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:28 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:46 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:29 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:47 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:30 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:48 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:31 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:49 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:32 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:50 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:33 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:51 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:34 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:52 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:35 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:53 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:36 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:37 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:55 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:38 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:56 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:57 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:40 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:58 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:41 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:59 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:42 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:60 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:43 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:61 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:44 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:62 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:45 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:63 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:46 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:64 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:47 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:65 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:48 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:66 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:49 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:67 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:50 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:68 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:51 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:69 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:52 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:53 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:71 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:54 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:73 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:74 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:57 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:75 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:58 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:76 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:59 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:77 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:60 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:78 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:61 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:79 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:62 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:80 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:63 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:81 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:64 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:82 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:65 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:83 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:66 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:84 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:67 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:85 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:68 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:86 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:69 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:87 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:70 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:88 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:71 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:72 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:74 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:75 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:76 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:77 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:78 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:79 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:80 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:81 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:82 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:100 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:83 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:101 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:84 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:102 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:85 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:103 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:86 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:104 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:87 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:105 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:88 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:106 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:89 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:107 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:91 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:94 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:98 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:101 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:109 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:110 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:111 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:112 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:113 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:114 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:115 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:116 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:117 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:118 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:102 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:103 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:104 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:105 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:106 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:107 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:108 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v21, v2, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:111 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:114 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:118 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:121 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:124 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:126 +; CHECK-NEXT: flat_store_byte v[0:1], v21 offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p5.i64(ptr %generic, ptr addrspace(5) %src, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p3_p4_minsize(ptr addrspace(4) %0) #0 { +; CHECK-LABEL: memcpy_p3_p4_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v24, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:112 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v24, s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v24, s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v24, s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v24, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:14 offset1:15 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset0:12 offset1:13 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[8:9], v[10:11] offset0:10 offset1:11 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[12:13], v[14:15] offset0:8 offset1:9 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: ds_write2_b64 v24, v[16:17], v[18:19] offset0:6 offset1:7 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: ds_write2_b64 v24, v[20:21], v[22:23] offset0:4 offset1:5 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:2 offset1:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset1:1 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) @shared, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p3_minsize(ptr %generic) #0 { +; CHECK-LABEL: memcpy_p0_p3_minsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:127 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:126 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:125 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:124 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:127 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:126 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:124 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:122 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:121 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:123 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:120 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:121 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:118 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:119 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:117 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:118 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:115 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:114 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:116 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:114 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:112 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:111 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:113 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:110 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:111 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:108 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:109 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:107 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:106 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:108 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:105 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:104 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:107 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:106 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:103 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:105 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:104 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:102 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:101 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:103 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:100 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:102 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:101 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:98 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:99 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:97 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:98 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:95 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:94 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:96 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:94 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:92 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:91 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:93 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:90 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:89 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:91 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:88 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:89 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:87 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:86 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:88 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:85 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:84 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:87 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:86 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:83 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:85 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:84 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:82 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:81 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:83 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:80 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:79 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:82 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:81 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:78 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:80 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:79 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:77 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:76 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:78 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:75 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:74 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:77 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:76 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:75 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:74 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:72 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:71 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:73 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:70 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:69 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:72 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:71 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:68 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:70 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:69 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:67 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:66 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:68 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:65 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:64 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:67 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:66 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:63 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:65 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:64 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:62 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:61 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:63 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:60 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:59 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:62 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:61 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:58 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:60 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:59 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:57 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:56 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:58 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:55 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:54 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:57 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:53 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:54 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:52 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:51 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:53 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:50 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:49 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:52 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:51 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:48 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:50 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:49 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:47 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:46 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:48 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:45 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:44 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:47 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:46 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:43 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:45 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:44 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:42 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:41 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:43 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:40 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:39 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:42 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:41 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:38 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:40 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:37 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:36 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:38 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:35 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:34 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:37 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:36 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:33 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:35 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:34 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:32 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:31 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:33 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:30 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:29 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:32 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:31 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:28 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:30 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:29 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:27 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:26 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:28 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:25 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:27 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:26 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:23 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:25 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:24 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:22 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:21 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:23 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:20 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:19 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:22 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:21 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:18 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:20 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:19 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:16 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:18 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:8 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:9 +; CHECK-NEXT: ds_read_u8 v7, v2 offset:10 +; CHECK-NEXT: ds_read_u8 v8, v2 offset:11 +; CHECK-NEXT: ds_read_u8 v9, v2 offset:12 +; CHECK-NEXT: ds_read_u8 v10, v2 offset:13 +; CHECK-NEXT: ds_read_u8 v11, v2 offset:14 +; CHECK-NEXT: ds_read_u8 v12, v2 offset:15 +; CHECK-NEXT: ds_read_u8 v13, v2 +; CHECK-NEXT: ds_read_u8 v14, v2 offset:1 +; CHECK-NEXT: ds_read_u8 v15, v2 offset:2 +; CHECK-NEXT: ds_read_u8 v16, v2 offset:3 +; CHECK-NEXT: ds_read_u8 v17, v2 offset:4 +; CHECK-NEXT: ds_read_u8 v18, v2 offset:5 +; CHECK-NEXT: ds_read_u8 v19, v2 offset:6 +; CHECK-NEXT: ds_read_u8 v2, v2 offset:7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:16 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:15 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:14 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:13 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:12 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:11 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:10 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:9 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:8 +; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:7 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:6 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:5 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:4 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:3 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:2 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:1 +; CHECK-NEXT: flat_store_byte v[0:1], v13 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p3.i64(ptr %generic, ptr addrspace(3) @shared, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p0_optsize(ptr %dest, ptr %src) #1 { +; CHECK-LABEL: memcpy_p0_p0_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v3, s1 +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:1 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:1 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:2 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:2 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:3 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:4 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:4 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:5 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:5 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:6 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:6 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:7 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:7 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:8 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:8 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:9 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:9 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:10 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:10 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:11 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:11 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:12 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:12 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:13 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:13 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:14 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:14 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:15 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:15 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:16 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:17 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:17 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:18 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:19 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:20 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:21 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:22 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:23 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:24 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:25 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:26 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:27 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:28 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:29 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:30 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:31 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:32 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:33 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:34 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:35 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:36 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:37 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:38 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:39 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:40 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:41 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:42 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:43 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:44 +; CHECK-NEXT: flat_load_ubyte v4, v[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v4 offset:45 +; CHECK-NEXT: flat_load_ubyte v0, v[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[2:3], v0 offset:46 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p1_optsize(ptr addrspace(1) %dest, ptr addrspace(1) %src) #1 { +; CHECK-LABEL: memcpy_p1_p1_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx2 v[0:1], v4, s[2:3] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[0:1] offset:39 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dest, ptr addrspace(1) %src, i64 47, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p1_p4_optsize(ptr addrspace(1) %global, ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p1_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v4, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:16 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:32 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v4, s[2:3] offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] offset:112 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %global, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p5_p4_optsize(ptr addrspace(5) %local, ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p5_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x8 +; CHECK-NEXT: s_load_dword s2, s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v0, 0 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:1 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:2 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:3 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:4 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:5 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:6 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:7 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:8 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:9 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:10 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:11 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:12 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:13 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:14 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:15 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: v_mov_b32_e32 v1, s2 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:16 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:17 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(18) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:19 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:1 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:20 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:2 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:21 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:3 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:22 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:4 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:23 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:5 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:24 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:6 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:25 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:7 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:26 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:8 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:27 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:9 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:28 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:10 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:29 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:11 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:30 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:12 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:31 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:13 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:14 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:33 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:15 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:34 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:16 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:35 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:17 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:18 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:37 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:19 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:38 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:20 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:39 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:21 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:40 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:22 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:41 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:23 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:42 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:24 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:43 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:25 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:44 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:26 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:45 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:27 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:46 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:28 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:47 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:29 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:48 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:30 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:49 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:31 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:50 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:32 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:51 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:33 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:52 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:34 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:53 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:35 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:36 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:55 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:37 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:56 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:38 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:57 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:39 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:58 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:40 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:59 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:41 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:60 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:42 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:61 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:43 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:62 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:44 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:63 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:45 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:64 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:46 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:65 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:47 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:66 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:48 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:67 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:49 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:68 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:50 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:69 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:51 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:52 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:71 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:53 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:54 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:73 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:55 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:74 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:56 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:75 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:57 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:76 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:58 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:77 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:59 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:78 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:60 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:79 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:61 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:80 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:62 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:81 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:63 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:82 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:64 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:83 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:65 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:84 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:66 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:85 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:67 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:86 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:68 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:87 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:69 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:88 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:71 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:72 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:73 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:74 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:75 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:76 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:77 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:78 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:79 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:80 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:81 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:100 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:82 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:101 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:83 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:102 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:84 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:103 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:85 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:104 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:86 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:105 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:87 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:106 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:88 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:107 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_waitcnt vmcnt(35) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(34) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_waitcnt vmcnt(33) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_waitcnt vmcnt(32) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_waitcnt vmcnt(31) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_waitcnt vmcnt(30) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_waitcnt vmcnt(29) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_waitcnt vmcnt(28) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_waitcnt vmcnt(27) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:100 +; CHECK-NEXT: global_load_ubyte v2, v0, s[0:1] offset:108 +; CHECK-NEXT: global_load_ubyte v3, v0, s[0:1] offset:109 +; CHECK-NEXT: global_load_ubyte v4, v0, s[0:1] offset:110 +; CHECK-NEXT: global_load_ubyte v5, v0, s[0:1] offset:111 +; CHECK-NEXT: global_load_ubyte v6, v0, s[0:1] offset:112 +; CHECK-NEXT: global_load_ubyte v7, v0, s[0:1] offset:113 +; CHECK-NEXT: global_load_ubyte v15, v0, s[0:1] offset:114 +; CHECK-NEXT: global_load_ubyte v16, v0, s[0:1] offset:115 +; CHECK-NEXT: global_load_ubyte v17, v0, s[0:1] offset:116 +; CHECK-NEXT: global_load_ubyte v18, v0, s[0:1] offset:117 +; CHECK-NEXT: global_load_ubyte v19, v0, s[0:1] offset:118 +; CHECK-NEXT: global_load_ubyte v20, v0, s[0:1] offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:101 +; CHECK-NEXT: global_load_ubyte v8, v0, s[0:1] offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:102 +; CHECK-NEXT: global_load_ubyte v9, v0, s[0:1] offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:103 +; CHECK-NEXT: global_load_ubyte v10, v0, s[0:1] offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:104 +; CHECK-NEXT: global_load_ubyte v11, v0, s[0:1] offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:105 +; CHECK-NEXT: global_load_ubyte v12, v0, s[0:1] offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:106 +; CHECK-NEXT: global_load_ubyte v13, v0, s[0:1] offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(36) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:107 +; CHECK-NEXT: global_load_ubyte v14, v0, s[0:1] offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: global_load_ubyte v21, v0, s[0:1] offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v2, v1, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v3, v1, s[8:11], 0 offen offset:109 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v4, v1, s[8:11], 0 offen offset:110 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v5, v1, s[8:11], 0 offen offset:111 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v6, v1, s[8:11], 0 offen offset:112 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v7, v1, s[8:11], 0 offen offset:113 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v15, v1, s[8:11], 0 offen offset:114 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v16, v1, s[8:11], 0 offen offset:115 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v17, v1, s[8:11], 0 offen offset:116 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v18, v1, s[8:11], 0 offen offset:117 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v19, v1, s[8:11], 0 offen offset:118 +; CHECK-NEXT: s_waitcnt vmcnt(26) +; CHECK-NEXT: buffer_store_byte v20, v1, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_waitcnt vmcnt(25) +; CHECK-NEXT: buffer_store_byte v8, v1, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_waitcnt vmcnt(24) +; CHECK-NEXT: buffer_store_byte v9, v1, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_waitcnt vmcnt(23) +; CHECK-NEXT: buffer_store_byte v10, v1, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_waitcnt vmcnt(22) +; CHECK-NEXT: buffer_store_byte v11, v1, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_waitcnt vmcnt(21) +; CHECK-NEXT: buffer_store_byte v12, v1, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_waitcnt vmcnt(20) +; CHECK-NEXT: buffer_store_byte v13, v1, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v14, v1, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_waitcnt vmcnt(19) +; CHECK-NEXT: buffer_store_byte v21, v1, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) %local, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p5_optsize(ptr %generic, ptr addrspace(5) %src) #1 { +; CHECK-LABEL: memcpy_p0_p5_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_mov_b64 s[10:11], s[2:3] +; CHECK-NEXT: s_mov_b64 s[8:9], s[0:1] +; CHECK-NEXT: s_load_dword s0, s[4:5], 0x8 +; CHECK-NEXT: s_add_u32 s8, s8, s7 +; CHECK-NEXT: s_addc_u32 s9, s9, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v2, s0 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:1 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:2 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:3 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:4 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:5 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:6 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:7 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:8 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:9 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:10 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:11 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:12 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:13 +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:14 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:15 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:16 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:17 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: s_waitcnt vmcnt(17) +; CHECK-NEXT: flat_store_byte v[0:1], v3 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:18 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:1 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:19 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:2 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:20 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:3 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:21 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:4 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:22 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:5 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:23 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:6 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:24 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:7 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:25 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:8 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:26 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:9 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:27 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:10 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:28 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:11 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:29 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:12 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:30 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:13 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:31 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:14 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:32 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:15 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:33 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:16 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:34 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:17 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:35 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:18 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:36 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:19 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:37 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:20 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:38 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:21 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:39 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:22 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:40 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:23 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:41 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:24 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:42 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:25 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:43 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:26 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:44 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:27 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:45 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:28 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:46 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:29 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:47 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:30 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:48 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:31 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:49 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:32 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:50 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:33 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:51 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:34 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:52 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:35 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:53 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:36 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:54 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:37 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:55 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:38 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:56 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:57 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:40 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:58 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:41 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:59 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:42 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:60 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:43 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:61 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:44 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:62 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:45 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:63 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:46 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:64 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:47 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:65 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:48 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:66 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:49 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:67 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:50 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:68 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:51 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:69 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:52 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:70 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:53 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:71 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:54 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:72 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:73 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:74 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:57 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:75 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:58 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:76 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:59 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:77 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:60 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:78 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:61 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:79 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:62 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:80 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:63 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:81 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:64 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:82 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:65 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:83 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:66 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:84 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:67 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:85 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:68 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:86 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:69 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:87 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:70 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:88 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:71 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:89 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:72 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:90 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:74 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:91 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:92 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:75 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:93 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:76 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:94 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:77 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:95 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:78 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:96 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:79 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:97 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:80 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:98 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:81 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:99 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:82 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:100 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:83 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:101 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:84 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:102 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:85 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:103 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:86 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:104 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:87 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:105 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:88 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:106 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:89 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:107 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:108 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:91 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:94 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:98 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:101 +; CHECK-NEXT: buffer_load_ubyte v4, v2, s[8:11], 0 offen offset:109 +; CHECK-NEXT: buffer_load_ubyte v5, v2, s[8:11], 0 offen offset:110 +; CHECK-NEXT: buffer_load_ubyte v6, v2, s[8:11], 0 offen offset:111 +; CHECK-NEXT: buffer_load_ubyte v7, v2, s[8:11], 0 offen offset:112 +; CHECK-NEXT: buffer_load_ubyte v8, v2, s[8:11], 0 offen offset:113 +; CHECK-NEXT: buffer_load_ubyte v9, v2, s[8:11], 0 offen offset:114 +; CHECK-NEXT: buffer_load_ubyte v10, v2, s[8:11], 0 offen offset:115 +; CHECK-NEXT: buffer_load_ubyte v11, v2, s[8:11], 0 offen offset:116 +; CHECK-NEXT: buffer_load_ubyte v12, v2, s[8:11], 0 offen offset:117 +; CHECK-NEXT: buffer_load_ubyte v13, v2, s[8:11], 0 offen offset:118 +; CHECK-NEXT: buffer_load_ubyte v14, v2, s[8:11], 0 offen offset:119 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:102 +; CHECK-NEXT: buffer_load_ubyte v15, v2, s[8:11], 0 offen offset:120 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:103 +; CHECK-NEXT: buffer_load_ubyte v16, v2, s[8:11], 0 offen offset:121 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:104 +; CHECK-NEXT: buffer_load_ubyte v17, v2, s[8:11], 0 offen offset:122 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:105 +; CHECK-NEXT: buffer_load_ubyte v18, v2, s[8:11], 0 offen offset:123 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:106 +; CHECK-NEXT: buffer_load_ubyte v19, v2, s[8:11], 0 offen offset:124 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:107 +; CHECK-NEXT: buffer_load_ubyte v20, v2, s[8:11], 0 offen offset:125 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:108 +; CHECK-NEXT: buffer_load_ubyte v3, v2, s[8:11], 0 offen offset:126 +; CHECK-NEXT: s_nop 0 +; CHECK-NEXT: buffer_load_ubyte v21, v2, s[8:11], 0 offen offset:127 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:111 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:114 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v13 offset:118 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:121 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:124 +; CHECK-NEXT: flat_store_byte v[0:1], v20 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:126 +; CHECK-NEXT: flat_store_byte v[0:1], v21 offset:127 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p5.i64(ptr %generic, ptr addrspace(5) %src, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p3_p4_optsize(ptr addrspace(4) %0) #1 { +; CHECK-LABEL: memcpy_p3_p4_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v24, 0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:112 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] offset:96 +; CHECK-NEXT: global_load_dwordx4 v[8:11], v24, s[0:1] offset:80 +; CHECK-NEXT: global_load_dwordx4 v[12:15], v24, s[0:1] offset:64 +; CHECK-NEXT: global_load_dwordx4 v[16:19], v24, s[0:1] offset:48 +; CHECK-NEXT: global_load_dwordx4 v[20:23], v24, s[0:1] offset:32 +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:14 offset1:15 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset0:12 offset1:13 +; CHECK-NEXT: global_load_dwordx4 v[0:3], v24, s[0:1] offset:16 +; CHECK-NEXT: global_load_dwordx4 v[4:7], v24, s[0:1] +; CHECK-NEXT: s_waitcnt vmcnt(5) +; CHECK-NEXT: ds_write2_b64 v24, v[8:9], v[10:11] offset0:10 offset1:11 +; CHECK-NEXT: s_waitcnt vmcnt(4) +; CHECK-NEXT: ds_write2_b64 v24, v[12:13], v[14:15] offset0:8 offset1:9 +; CHECK-NEXT: s_waitcnt vmcnt(3) +; CHECK-NEXT: ds_write2_b64 v24, v[16:17], v[18:19] offset0:6 offset1:7 +; CHECK-NEXT: s_waitcnt vmcnt(2) +; CHECK-NEXT: ds_write2_b64 v24, v[20:21], v[22:23] offset0:4 offset1:5 +; CHECK-NEXT: s_waitcnt vmcnt(1) +; CHECK-NEXT: ds_write2_b64 v24, v[0:1], v[2:3] offset0:2 offset1:3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: ds_write2_b64 v24, v[4:5], v[6:7] offset1:1 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) @shared, ptr addrspace(4) %0, i64 128, i1 false) + ret void +} + +define amdgpu_kernel void @memcpy_p0_p3_optsize(ptr %generic) #1 { +; CHECK-LABEL: memcpy_p0_p3_optsize: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:127 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:126 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:125 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:124 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: v_mov_b32_e32 v0, s0 +; CHECK-NEXT: v_mov_b32_e32 v1, s1 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:127 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:126 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:123 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:125 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:124 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:122 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:121 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:123 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:120 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:119 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:122 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:121 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:118 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:120 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:119 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:117 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:116 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:118 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:115 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:114 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:117 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:116 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:113 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:115 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:114 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:112 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:111 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:113 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:110 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:109 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:112 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:111 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:108 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:110 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:109 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:107 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:106 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:108 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:105 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:104 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:107 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:106 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:103 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:105 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:104 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:102 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:101 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:103 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:100 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:99 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:102 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:101 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:98 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:100 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:99 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:97 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:96 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:98 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:95 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:94 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:97 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:96 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:93 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:95 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:94 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:92 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:91 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:93 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:90 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:89 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:92 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:91 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:88 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:90 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:89 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:87 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:86 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:88 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:85 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:84 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:87 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:86 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:83 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:85 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:84 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:82 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:81 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:83 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:80 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:79 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:82 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:81 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:78 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:80 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:79 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:77 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:76 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:78 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:75 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:74 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:77 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:76 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:73 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:75 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:74 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:72 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:71 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:73 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:70 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:69 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:72 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:71 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:68 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:70 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:69 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:67 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:66 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:68 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:65 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:64 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:67 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:66 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:63 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:65 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:64 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:62 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:61 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:63 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:60 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:59 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:62 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:61 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:58 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:60 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:59 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:57 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:56 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:58 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:55 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:54 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:57 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:56 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:53 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:55 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:54 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:52 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:51 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:53 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:50 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:49 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:52 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:51 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:48 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:50 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:49 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:47 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:46 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:48 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:45 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:44 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:47 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:46 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:43 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:45 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:44 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:42 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:41 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:43 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:40 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:39 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:42 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:41 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:38 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:40 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:39 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:37 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:36 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:38 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:35 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:34 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:37 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:36 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:33 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:35 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:34 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:32 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:31 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:33 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:30 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:29 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:32 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:31 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:28 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:30 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:29 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:27 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:26 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:28 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:25 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:24 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:27 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:26 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:23 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:25 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:24 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:22 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:21 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:23 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:20 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:19 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:22 +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:21 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:18 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:20 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:19 +; CHECK-NEXT: ds_read_u8 v3, v2 offset:16 +; CHECK-NEXT: ds_read_u8 v5, v2 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:18 +; CHECK-NEXT: ds_read_u8 v4, v2 offset:8 +; CHECK-NEXT: ds_read_u8 v6, v2 offset:9 +; CHECK-NEXT: ds_read_u8 v7, v2 offset:10 +; CHECK-NEXT: ds_read_u8 v8, v2 offset:11 +; CHECK-NEXT: ds_read_u8 v9, v2 offset:12 +; CHECK-NEXT: ds_read_u8 v10, v2 offset:13 +; CHECK-NEXT: ds_read_u8 v11, v2 offset:14 +; CHECK-NEXT: ds_read_u8 v12, v2 offset:15 +; CHECK-NEXT: ds_read_u8 v13, v2 +; CHECK-NEXT: ds_read_u8 v14, v2 offset:1 +; CHECK-NEXT: ds_read_u8 v15, v2 offset:2 +; CHECK-NEXT: ds_read_u8 v16, v2 offset:3 +; CHECK-NEXT: ds_read_u8 v17, v2 offset:4 +; CHECK-NEXT: ds_read_u8 v18, v2 offset:5 +; CHECK-NEXT: ds_read_u8 v19, v2 offset:6 +; CHECK-NEXT: ds_read_u8 v2, v2 offset:7 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: flat_store_byte v[0:1], v5 offset:17 +; CHECK-NEXT: flat_store_byte v[0:1], v3 offset:16 +; CHECK-NEXT: flat_store_byte v[0:1], v12 offset:15 +; CHECK-NEXT: flat_store_byte v[0:1], v11 offset:14 +; CHECK-NEXT: flat_store_byte v[0:1], v10 offset:13 +; CHECK-NEXT: flat_store_byte v[0:1], v9 offset:12 +; CHECK-NEXT: flat_store_byte v[0:1], v8 offset:11 +; CHECK-NEXT: flat_store_byte v[0:1], v7 offset:10 +; CHECK-NEXT: flat_store_byte v[0:1], v6 offset:9 +; CHECK-NEXT: flat_store_byte v[0:1], v4 offset:8 +; CHECK-NEXT: flat_store_byte v[0:1], v2 offset:7 +; CHECK-NEXT: flat_store_byte v[0:1], v19 offset:6 +; CHECK-NEXT: flat_store_byte v[0:1], v18 offset:5 +; CHECK-NEXT: flat_store_byte v[0:1], v17 offset:4 +; CHECK-NEXT: flat_store_byte v[0:1], v16 offset:3 +; CHECK-NEXT: flat_store_byte v[0:1], v15 offset:2 +; CHECK-NEXT: flat_store_byte v[0:1], v14 offset:1 +; CHECK-NEXT: flat_store_byte v[0:1], v13 +; CHECK-NEXT: s_endpgm +entry: + tail call void @llvm.memcpy.p0.p3.i64(ptr %generic, ptr addrspace(3) @shared, i64 128, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p0.p5.i64(ptr noalias nocapture writeonly, ptr addrspace(5) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p3.p4.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #2 + +declare void @llvm.memcpy.p0.p3.i64(ptr noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #0 = { minsize } +attributes #1 = { optsize } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir index f8e7cb3..8a5f753 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir @@ -28,18 +28,17 @@ body: | ; GCN-LABEL: name: test_main ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) - ; GCN-NEXT: liveins: $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $sgpr102, $sgpr103, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} - ; GCN-NEXT: $sgpr0 = COPY $sgpr33 + ; GCN-NEXT: $vcc_hi = frame-setup COPY $sgpr33 ; GCN-NEXT: $sgpr33 = frame-setup COPY $sgpr32 - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5) ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5) - ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 - ; GCN-NEXT: $vgpr5 = SI_SPILL_S32_TO_VGPR $sgpr0, 4, undef $vgpr5 + ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc ; GCN-NEXT: renamable $vgpr2 = IMPLICIT_DEF ; GCN-NEXT: $vgpr3 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr3 @@ -116,18 +115,18 @@ body: | ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: ; GCN-NEXT: successors: %bb.2(0x80000000) - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: KILL implicit-def $vcc_lo, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99_sgpr100_sgpr101_sgpr102_sgpr103 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.2: ; GCN-NEXT: successors: %bb.3(0x80000000) - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr22 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: - ; GCN-NEXT: liveins: $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GCN-NEXT: liveins: $vcc_hi, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GCN-NEXT: {{ $}} ; GCN-NEXT: $sgpr103 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 3 ; GCN-NEXT: $sgpr102 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 2 @@ -198,16 +197,15 @@ body: | ; GCN-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 1 ; GCN-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr3, 0 ; GCN-NEXT: KILL killed renamable $vgpr2 - ; GCN-NEXT: $sgpr0 = SI_RESTORE_S32_FROM_VGPR $vgpr5, 4 - ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec + ; GCN-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5) ; GCN-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5) ; GCN-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5) ; GCN-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5) - ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5) - ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1 + ; GCN-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5) + ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0 ; GCN-NEXT: $sgpr32 = frame-destroy S_ADD_I32 $sgpr32, -24, implicit-def dead $scc - ; GCN-NEXT: $sgpr33 = COPY $sgpr0 + ; GCN-NEXT: $sgpr33 = frame-destroy COPY $vcc_hi ; GCN-NEXT: S_ENDPGM 0 bb.0: liveins: $vgpr0 diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll index 731a882..204c814 100644 --- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll +++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll @@ -278,7 +278,7 @@ entry: ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half ; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) -; GCN-PRELINK: %__pow2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-PRELINK: %__pow2sqrt = tail call fast float @llvm.sqrt.f32(float %tmp) define amdgpu_kernel void @test_pow_half(ptr addrspace(1) nocapture %a) { entry: %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 @@ -476,7 +476,7 @@ declare float @_Z5rootnfi(float, i32) ; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 ; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 2) -; GCN-PRELINK: %__rootn2sqrt = tail call fast float @_Z4sqrtf(float %tmp) +; GCN-PRELINK: %__rootn2sqrt = tail call fast float @llvm.sqrt.f32(float %tmp) define amdgpu_kernel void @test_rootn_2(ptr addrspace(1) nocapture %a) { entry: %tmp = load float, ptr addrspace(1) %a, align 4 @@ -838,5 +838,5 @@ entry: ; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]] ; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind } -; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nofree nounwind memory(read) } +; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) } attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll index a2d99f1f..557d023 100644 --- a/llvm/test/CodeGen/AMDGPU/umed3.ll +++ b/llvm/test/CodeGen/AMDGPU/umed3.ll @@ -43,8 +43,7 @@ define amdgpu_kernel void @v_test_umed3_multi_use_r_i_i_i32(ptr addrspace(1) %ou } ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_sign_mismatch_i32: -; GCN: v_max_i32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}} -; GCN: v_min_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}} +; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17 define amdgpu_kernel void @v_test_umed3_r_i_i_sign_mismatch_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i32, ptr addrspace(1) %aptr, i32 %tid diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir index 63bef40..b8ac50c 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -160,7 +160,7 @@ body: | ; PAIR-GFX11-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $sgpr20 = IMPLICIT_DEF ; PAIR-GFX11-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx11 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo @@ -174,7 +174,7 @@ body: | ; PAIR-GFX12-NEXT: $vgpr3 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $sgpr20 = IMPLICIT_DEF ; PAIR-GFX12-NEXT: $vgpr4 = V_FMAMK_F32 $sgpr20, 12345, $vgpr3, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx12 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr2, $vgpr5 = V_DUAL_FMAC_F32_e32_X_CNDMASK_B32_e32_gfx12 $sgpr20, killed $vgpr1, killed $vgpr2, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr7 = V_CNDMASK_B32_e32 killed $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr6 = V_ADD_F32_e32 $sgpr20, $vgpr3, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr9 = V_CNDMASK_B32_e32 killed $sgpr20, killed $vgpr3, implicit $mode, implicit $exec, implicit killed $vcc_lo @@ -458,9 +458,9 @@ body: | ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX11-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx11 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX11-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec @@ -476,9 +476,9 @@ body: | ; PAIR-GFX12-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr12, $vgpr19 = V_DUAL_ADD_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr11 = V_CNDMASK_B32_e32 $vgpr0, killed $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX12-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx12 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr17, $vgpr10 = V_DUAL_MUL_F32_e32_X_CNDMASK_B32_e32_gfx12 killed $vgpr0, $vgpr0, $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, killed $vgpr2, implicit $mode, implicit $exec, implicit killed $vcc_lo ; PAIR-GFX12-NEXT: $vgpr16 = V_SUB_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr14 = V_SUB_F32_e32 killed $vgpr1, $vgpr1, implicit $mode, implicit $exec @@ -559,12 +559,12 @@ body: | ; PAIR-GFX11-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX11-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx11 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX11-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx11 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX11-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec - ; PAIR-GFX11-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx11 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX11-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx11 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx11 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX11-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo @@ -586,12 +586,12 @@ body: | ; PAIR-GFX12-NEXT: $vgpr3, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_FMAC_F32_e32 10, $vgpr1, killed $vgpr2, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX12-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx12 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr4, $vgpr29 = V_DUAL_SUB_F32_e32_X_CNDMASK_B32_e32_gfx12 $vgpr1, $vgpr1, $vgpr0, $vgpr3, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $vcc_lo + ; PAIR-GFX12-NEXT: $vgpr19, $vgpr20 = V_DUAL_CNDMASK_B32_e32_X_FMAC_F32_e32_gfx12 $vgpr0, $vgpr3, 10, $vgpr1, killed $vgpr20, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr15 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo - ; PAIR-GFX12-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx12 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec - ; PAIR-GFX12-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx12 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr10, $vgpr17 = V_DUAL_CNDMASK_B32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr2, $vgpr0, $vgpr0, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr11, $vgpr12 = V_DUAL_CNDMASK_B32_e32_X_ADD_F32_e32_gfx12 $vgpr0, $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec + ; PAIR-GFX12-NEXT: $vgpr37, $vgpr14 = V_DUAL_CNDMASK_B32_e32_X_SUB_F32_e32_gfx12 $vgpr0, killed $vgpr3, $vgpr1, $vgpr1, implicit $vcc_lo, implicit $exec, implicit $mode, implicit $mode, implicit $exec, implicit $vcc_lo, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr20 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr21, $vgpr24 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx12 $vgpr1, $vgpr1, killed $vgpr0, $vgpr0, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec ; PAIR-GFX12-NEXT: $vgpr28 = V_CNDMASK_B32_e32 $vgpr1, $vgpr2, implicit $mode, implicit $exec, implicit $vcc_lo diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir new file mode 100644 index 0000000..d828d9e --- /dev/null +++ b/llvm/test/CodeGen/ARM/misched-branch-targets.mir @@ -0,0 +1,169 @@ +# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s +# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s + +# REQUIRES: asserts +# -misched=shuffle is only available with assertions enabled + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define i32 @foo_bti() #0 { + entry: + ret i32 0 + } + + define i32 @foo_pac() #0 { + entry: + ret i32 0 + } + + define i32 @foo_pacbti() #0 { + entry: + ret i32 0 + } + + define i32 @foo_setjmp() #0 { + entry: + ret i32 0 + if.then: + ret i32 0 + } + + define i32 @foo_sg() #0 { + entry: + ret i32 0 + } + + declare i32 @setjmp(ptr noundef) #1 + declare void @longjmp(ptr noundef, i32 noundef) #2 + + attributes #0 = { "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + attributes #1 = { nounwind returns_twice "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + attributes #2 = { noreturn nounwind "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + +... +--- +name: foo_bti +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0 + + t2BTI + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... + +# CHECK-LABEL: name: foo_bti +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: t2BTI + +--- +name: foo_pac +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0, $lr, $r12 + + frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... + +# CHECK-LABEL: name: foo_pac +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0, $lr, $r12 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + +--- +name: foo_pacbti +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0, $lr, $r12 + + frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... + +# CHECK-LABEL: name: foo_pacbti +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0, $lr, $r12 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp + +--- +name: foo_setjmp +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1 + liveins: $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp + $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg + $sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg + renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + t2BTI + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 0, 2, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate + $sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate + frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate + + bb.1.if.then: + renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg + renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp + +... + +# CHECK-LABEL: name: foo_setjmp +# CHECK: body: +# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 +# CHECK-NEXT: t2BTI + +--- +name: foo_sg +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0 + + t2SG 14 /* CC::al */, $noreg + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... + +# CHECK-LABEL: name: foo_sg +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: t2SG diff --git a/llvm/test/CodeGen/ARM/usat.ll b/llvm/test/CodeGen/ARM/usat.ll index 024a98d..d01aa15 100644 --- a/llvm/test/CodeGen/ARM/usat.ll +++ b/llvm/test/CodeGen/ARM/usat.ll @@ -756,7 +756,7 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI20_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -765,23 +765,12 @@ define i32 @mm_unsigned_sat_upper_lower_1(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_1: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI20_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI20_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_1: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -795,7 +784,7 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI21_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -804,23 +793,12 @@ define i32 @mm_unsigned_sat_upper_lower_2(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_2: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI21_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI21_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_2: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -834,7 +812,7 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) { ; V4T-NEXT: bic r1, r0, r0, asr #31 ; V4T-NEXT: ldr r0, .LCPI22_0 ; V4T-NEXT: cmp r1, r0 -; V4T-NEXT: movlo r0, r1 +; V4T-NEXT: movlt r0, r1 ; V4T-NEXT: bx lr ; V4T-NEXT: .p2align 2 ; V4T-NEXT: @ %bb.1: @@ -843,23 +821,12 @@ define i32 @mm_unsigned_sat_upper_lower_3(i32 %x) { ; ; V6-LABEL: mm_unsigned_sat_upper_lower_3: ; V6: @ %bb.0: @ %entry -; V6-NEXT: bic r1, r0, r0, asr #31 -; V6-NEXT: ldr r0, .LCPI22_0 -; V6-NEXT: cmp r1, r0 -; V6-NEXT: movlo r0, r1 +; V6-NEXT: usat r0, #23, r0 ; V6-NEXT: bx lr -; V6-NEXT: .p2align 2 -; V6-NEXT: @ %bb.1: -; V6-NEXT: .LCPI22_0: -; V6-NEXT: .long 8388607 @ 0x7fffff ; ; V6T2-LABEL: mm_unsigned_sat_upper_lower_3: ; V6T2: @ %bb.0: @ %entry -; V6T2-NEXT: bic r1, r0, r0, asr #31 -; V6T2-NEXT: movw r0, #65535 -; V6T2-NEXT: movt r0, #127 -; V6T2-NEXT: cmp r1, r0 -; V6T2-NEXT: movlo r0, r1 +; V6T2-NEXT: usat r0, #23, r0 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -913,7 +880,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V4T-NEXT: mov r0, #1 ; V4T-NEXT: orr r0, r0, #8388608 ; V4T-NEXT: cmp r1, #8388608 -; V4T-NEXT: movls r0, r1 +; V4T-NEXT: movle r0, r1 ; V4T-NEXT: bx lr ; ; V6-LABEL: mm_no_unsigned_sat_incorrect_constant2: @@ -922,7 +889,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V6-NEXT: mov r0, #1 ; V6-NEXT: orr r0, r0, #8388608 ; V6-NEXT: cmp r1, #8388608 -; V6-NEXT: movls r0, r1 +; V6-NEXT: movle r0, r1 ; V6-NEXT: bx lr ; ; V6T2-LABEL: mm_no_unsigned_sat_incorrect_constant2: @@ -931,7 +898,7 @@ define i32 @mm_no_unsigned_sat_incorrect_constant2(i32 %x) { ; V6T2-NEXT: movw r0, #1 ; V6T2-NEXT: movt r0, #128 ; V6T2-NEXT: cmp r1, #8388608 -; V6T2-NEXT: movls r0, r1 +; V6T2-NEXT: movle r0, r1 ; V6T2-NEXT: bx lr entry: %0 = call i32 @llvm.smax.i32(i32 %x, i32 0) @@ -981,6 +948,29 @@ entry: ret i32 %1 } +define i32 @test_umin_smax_usat(i32 %x) { +; V4T-LABEL: test_umin_smax_usat: +; V4T: @ %bb.0: @ %entry +; V4T-NEXT: bic r0, r0, r0, asr #31 +; V4T-NEXT: cmp r0, #255 +; V4T-NEXT: movge r0, #255 +; V4T-NEXT: bx lr +; +; V6-LABEL: test_umin_smax_usat: +; V6: @ %bb.0: @ %entry +; V6-NEXT: usat r0, #8, r0 +; V6-NEXT: bx lr +; +; V6T2-LABEL: test_umin_smax_usat: +; V6T2: @ %bb.0: @ %entry +; V6T2-NEXT: usat r0, #8, r0 +; V6T2-NEXT: bx lr +entry: + %v1 = tail call i32 @llvm.smax.i32(i32 %x, i32 0) + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 255) + ret i32 %v2 +} + declare i32 @llvm.smin.i32(i32, i32) declare i32 @llvm.smax.i32(i32, i32) declare i16 @llvm.smin.i16(i16, i16) diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir index eaa6279..40ea011 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables-x.mir @@ -1,5 +1,6 @@ # REQUIRES: x86-registered-target # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-check-debugify -o - %s 2>&1 | FileCheck %s --- | ; ModuleID = 'check-line-and-variables.mir' source_filename = "check-line-and-variables.c" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll index 9033fd2..56c7cf45 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.ll @@ -1,4 +1,5 @@ ; RUN: llc -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s +; RUN: llc --experimental-debuginfo-iterators=false -debugify-check-and-strip-all-safe -o - %s 2>&1 | FileCheck %s ; ModuleID = 'main.c' source_filename = "main.c" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir index 9eb7222..0805a7f 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/check-line-and-variables.mir @@ -1,6 +1,8 @@ # REQUIRES: x86-registered-target # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,dead-mi-elimination,mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -mtriple=x86_64-unknown-linux-gnu -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s --check-prefix=CHECK-PASS --- | ; ModuleID = 'check-line-and-variables.mir' source_filename = "check-line-and-variables.ll" diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir index 59dcff9..3035fb8 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir @@ -2,6 +2,10 @@ # RUN: llc -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s # RUN: llc -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s # RUN: llc -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify -debugify-level=locations -o - %s | FileCheck --check-prefixes=ALL --implicit-check-not=dbg.value %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug,mir-debugify -o - %s | FileCheck --check-prefixes=ALL,VALUE %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-strip-debug -o - %s | FileCheck --check-prefix=STRIP %s --- | ; ModuleID = 'loc-only.ll' diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir index fe4fcc1..8079db9 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/multifunction-module.mir @@ -1,6 +1,5 @@ -# FIXME: Remove rm after a few weeks. -# RUN: rm -f %S/multifunction-module.s # RUN: llc -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s +# RUN: llc --experimental-debuginfo-iterators=false -run-pass=mir-debugify,mir-check-debugify -o - %s 2>&1 | FileCheck %s # CHECK: Machine IR debug info check: PASS # CHECK-NOT: Assertion `Var <= NumVars && "Unexpected name for DILocalVariable"' diff --git a/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll b/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll new file mode 100644 index 0000000..bc89ddea --- /dev/null +++ b/llvm/test/CodeGen/Generic/expand-vp-fp-intrinsics.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -expandvp -S < %s | FileCheck %s + +define void @vp_fadd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fadd_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[RES1:%.*]] = fadd <4 x float> [[A0]], [[A1]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fadd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_fsub_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fsub_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = fsub <4 x float> [[A0]], [[A1]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fsub.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_fmul_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fmul_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = fmul <4 x float> [[A0]], [[A1]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fmul.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fmul.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_fdiv_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fdiv_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = fdiv <4 x float> [[A0]], [[A1]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fdiv.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_frem_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_frem_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = frem <4 x float> [[A0]], [[A1]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.frem.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.frem.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_fabs_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fabs_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A0]]) +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fabs.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fabs.v4f32(<4 x float>, <4 x i1>, i32) + +define void @vp_sqrt_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_sqrt_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[A0]]) +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.sqrt.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.sqrt.v4f32(<4 x float>, <4 x i1>, i32) + +define void @vp_fneg_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i32 %vp) nounwind { +; CHECK-LABEL: define void @vp_fneg_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i32 [[VP:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = fneg <4 x float> [[A0]] +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fneg.v4f32(<4 x float> %a0, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 %vp) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fneg.v4f32(<4 x float>, <4 x i1>, i32) + +define void @vp_fma_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind { +; CHECK-LABEL: define void @vp_fma_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]]) +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) + +define void @vp_fmuladd_v4f32(<4 x float> %a0, <4 x float> %a1, ptr %out, i4 %a5) nounwind { +; CHECK-LABEL: define void @vp_fmuladd_v4f32( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], ptr [[OUT:%.*]], i4 [[A5:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A1]]) +; CHECK-NEXT: store <4 x float> [[RES1]], ptr [[OUT]], align 16 +; CHECK-NEXT: ret void +; + %res = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a1, <4 x i1> <i1 -1, i1 -1, i1 -1, i1 -1>, i32 4) + store <4 x float> %res, ptr %out + ret void +} +declare <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32) + +declare <4 x float> @llvm.vp.maxnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) +define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: define <4 x float> @vfmax_vv_v4f32( +; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]]) +; CHECK-NEXT: ret <4 x float> [[V1]] +; + %v = call <4 x float> @llvm.vp.maxnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + ret <4 x float> %v +} + +declare <8 x float> @llvm.vp.maxnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: define <8 x float> @vfmax_vv_v8f32( +; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.maxnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]]) +; CHECK-NEXT: ret <8 x float> [[V1]] +; + %v = call <8 x float> @llvm.vp.maxnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + ret <8 x float> %v +} + +declare <4 x float> @llvm.vp.minnum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32) +define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: define <4 x float> @vfmin_vv_v4f32( +; CHECK-SAME: <4 x float> [[VA:%.*]], <4 x float> [[VB:%.*]], <4 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VA]], <4 x float> [[VB]]) +; CHECK-NEXT: ret <4 x float> [[V1]] +; + %v = call <4 x float> @llvm.vp.minnum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) + ret <4 x float> %v +} + +declare <8 x float> @llvm.vp.minnum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32) +define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: define <8 x float> @vfmin_vv_v8f32( +; CHECK-SAME: <8 x float> [[VA:%.*]], <8 x float> [[VB:%.*]], <8 x i1> [[M:%.*]], i32 zeroext [[EVL:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = call <8 x float> @llvm.minnum.v8f32(<8 x float> [[VA]], <8 x float> [[VB]]) +; CHECK-NEXT: ret <8 x float> [[V1]] +; + %v = call <8 x float> @llvm.vp.minnum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) + ret <8 x float> %v +} diff --git a/llvm/test/CodeGen/LoongArch/gep-imm.ll b/llvm/test/CodeGen/LoongArch/gep-imm.ll new file mode 100644 index 0000000..0eef7e4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/gep-imm.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s + +define void @test(ptr %sp, ptr %t, i32 %n) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: move $a3, $zero +; CHECK-NEXT: addi.w $a2, $a2, 0 +; CHECK-NEXT: addi.w $a4, $a3, 0 +; CHECK-NEXT: bge $a4, $a2, .LBB0_2 +; CHECK-NEXT: .p2align 4, , 16 +; CHECK-NEXT: .LBB0_1: # %while_body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: addi.d $a4, $a3, 1 +; CHECK-NEXT: stptr.w $a4, $a0, 8000 +; CHECK-NEXT: stptr.w $a3, $a0, 8004 +; CHECK-NEXT: stptr.w $a4, $a1, 8000 +; CHECK-NEXT: stptr.w $a3, $a1, 8004 +; CHECK-NEXT: move $a3, $a4 +; CHECK-NEXT: addi.w $a4, $a3, 0 +; CHECK-NEXT: blt $a4, $a2, .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %while_end +; CHECK-NEXT: ret +entry: + %s = load ptr, ptr %sp + br label %while_cond + +while_cond: + %phi = phi i32 [ 0, %entry ], [ %i, %while_body ] + %gep0 = getelementptr [65536 x i32], ptr %s, i64 0, i64 2000 + %gep1 = getelementptr [65536 x i32], ptr %s, i64 0, i64 2001 + %gep2 = getelementptr [65536 x i32], ptr %t, i64 0, i64 2000 + %gep3 = getelementptr [65536 x i32], ptr %t, i64 0, i64 2001 + %cmp = icmp slt i32 %phi, %n + br i1 %cmp, label %while_body, label %while_end + +while_body: + %i = add i32 %phi, 1 + store i32 %i, ptr %gep0 + store i32 %phi, ptr %gep1 + store i32 %i, ptr %gep2 + store i32 %phi, ptr %gep3 + br label %while_cond + +while_end: + ret void +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir index df0d48a..0677232 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir @@ -13,13 +13,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv1i8 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv1i8 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 1 x s8>) = G_IMPLICIT_DEF @@ -37,13 +37,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv2i8 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv2i8 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 2 x s8>) = G_IMPLICIT_DEF @@ -61,13 +61,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv4i8 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv4i8 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 4 x s8>) = G_IMPLICIT_DEF @@ -85,13 +85,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv8i8 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv8i8 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 8 x s8>) = G_IMPLICIT_DEF @@ -109,14 +109,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv16i8 ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv16i8 ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 16 x s8>) = G_IMPLICIT_DEF %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(ugt), %0(<vscale x 16 x s8>), %0 @@ -133,14 +133,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv32i8 ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv32i8 ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 32 x s8>) = G_IMPLICIT_DEF %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 32 x s8>), %0 @@ -157,14 +157,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv64i8 ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv64i8 ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 64 x s8>) = G_IMPLICIT_DEF %1:vrb(<vscale x 64 x s1>) = G_ICMP intpred(ule), %0(<vscale x 64 x s8>), %0 @@ -181,13 +181,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv1i16 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv1i16 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 1 x s16>) = G_IMPLICIT_DEF @@ -205,13 +205,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv2i16 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv2i16 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 2 x s16>) = G_IMPLICIT_DEF @@ -229,13 +229,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv4i16 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv4i16 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 4 x s16>) = G_IMPLICIT_DEF @@ -253,14 +253,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv8i16 ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv8i16 ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 8 x s16>) = G_IMPLICIT_DEF %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s16>), %0 @@ -277,14 +277,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv16i16 ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv16i16 ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLT_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 16 x s16>) = G_IMPLICIT_DEF %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(slt), %0(<vscale x 16 x s16>), %0 @@ -301,14 +301,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv32i16 ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv32i16 ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLEU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 32 x s16>) = G_IMPLICIT_DEF %1:vrb(<vscale x 32 x s1>) = G_ICMP intpred(uge), %0(<vscale x 32 x s16>), %0 @@ -325,13 +325,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv1i32 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv1i32 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vmm1 = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 1 x s32>) = G_IMPLICIT_DEF @@ -349,13 +349,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv2i32 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv2i32 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 2 x s32>) = G_IMPLICIT_DEF @@ -373,14 +373,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv4i32 ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLT_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv4i32 ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLT_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 4 x s32>) = G_IMPLICIT_DEF %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(sgt), %0(<vscale x 4 x s32>), %0 @@ -397,14 +397,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv8i32 ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLEU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv8i32 ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLEU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 8 x s32>) = G_IMPLICIT_DEF %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ule), %0(<vscale x 8 x s32>), %0 @@ -421,14 +421,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv16i32 ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLE_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv16i32 ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLE_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 16 x s32>) = G_IMPLICIT_DEF %1:vrb(<vscale x 16 x s1>) = G_ICMP intpred(sle), %0(<vscale x 16 x s32>), %0 @@ -445,13 +445,13 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv1i64 ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv1i64 ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF - ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vmm1 = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 1 x s64>) = G_IMPLICIT_DEF @@ -469,14 +469,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv2i64 ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSNE_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_M2_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv2i64 ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSNE_VV_M2_:%[0-9]+]]:vmm2 = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_M2_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 2 x s64>) = G_IMPLICIT_DEF %1:vrb(<vscale x 2 x s1>) = G_ICMP intpred(ne), %0(<vscale x 2 x s64>), %0 @@ -493,14 +493,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv4i64 ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M4_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv4i64 ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M4_:%[0-9]+]]:vmm4 = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M4_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 4 x s64>) = G_IMPLICIT_DEF %1:vrb(<vscale x 4 x s1>) = G_ICMP intpred(ult), %0(<vscale x 4 x s64>), %0 @@ -517,14 +517,14 @@ body: | bb.0.entry: ; RV32I-LABEL: name: icmp_nxv8i64 ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M8_]] ; RV32I-NEXT: PseudoRET implicit $v8 ; ; RV64I-LABEL: name: icmp_nxv8i64 ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF - ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ - ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M8_:%[0-9]+]]:vmm8 = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M8_]] ; RV64I-NEXT: PseudoRET implicit $v8 %0:vrb(<vscale x 8 x s64>) = G_IMPLICIT_DEF %1:vrb(<vscale x 8 x s1>) = G_ICMP intpred(ult), %0(<vscale x 8 x s64>), %0 diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll index 2326599..080783f 100644 --- a/llvm/test/CodeGen/RISCV/attributes.ll +++ b/llvm/test/CodeGen/RISCV/attributes.ll @@ -115,6 +115,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zacas %s -o - | FileCheck --check-prefix=RV32ZACAS %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zalasr %s -o - | FileCheck --check-prefix=RV32ZALASR %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zalrsc %s -o - | FileCheck --check-prefix=RV32ZALRSC %s +; RUN: llc -mtriple=riscv32 -mattr=+zama16b %s -o - | FileCheck --check-prefixes=CHECK,RV32ZAMA16B %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp %s -o - | FileCheck --check-prefix=RV32ZICFILP %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zabha %s -o - | FileCheck --check-prefix=RV32ZABHA %s ; RUN: llc -mtriple=riscv32 -mattr=+experimental-ssnpm %s -o - | FileCheck --check-prefix=RV32SSNPM %s @@ -199,6 +200,7 @@ ; RUN: llc -mtriple=riscv64 -mattr=+xtheadvdot %s -o - | FileCheck --check-prefixes=CHECK,RV64XTHEADVDOT %s ; RUN: llc -mtriple=riscv64 -mattr=+za64rs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZA64RS %s ; RUN: llc -mtriple=riscv64 -mattr=+za128rs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZA128RS %s +; RUN: llc -mtriple=riscv64 -mattr=+zama16b %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAMA16B %s ; RUN: llc -mtriple=riscv64 -mattr=+zawrs %s -o - | FileCheck --check-prefixes=CHECK,RV64ZAWRS %s ; RUN: llc -mtriple=riscv64 -mattr=+experimental-ztso %s -o - | FileCheck --check-prefixes=CHECK,RV64ZTSO %s ; RUN: llc -mtriple=riscv64 -mattr=+zca %s -o - | FileCheck --check-prefixes=CHECK,RV64ZCA %s @@ -370,6 +372,7 @@ ; RV32ZACAS: .attribute 5, "rv32i2p1_a2p1_zacas1p0" ; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p1" ; RV32ZALRSC: .attribute 5, "rv32i2p1_zalrsc0p2" +; RV32ZAMA16B: .attribute 5, "rv32i2p1_zama16b1p0" ; RV32ZICFILP: .attribute 5, "rv32i2p1_zicfilp0p4" ; RV32ZABHA: .attribute 5, "rv32i2p1_a2p1_zabha1p0" ; RV32SSNPM: .attribute 5, "rv32i2p1_ssnpm0p8" @@ -418,6 +421,7 @@ ; RV64ZICBOZ: .attribute 5, "rv64i2p1_zicboz1p0" ; RV64ZA64RS: .attribute 5, "rv64i2p1_za64rs1p0" ; RV64ZA128RS: .attribute 5, "rv64i2p1_za128rs1p0" +; RV64ZAMA16B: .attribute 5, "rv64i2p1_zama16b1p0" ; RV64ZAWRS: .attribute 5, "rv64i2p1_zawrs1p0" ; RV64ZICBOP: .attribute 5, "rv64i2p1_zicbop1p0" ; RV64SHCOUNTERENW: .attribute 5, "rv64i2p1_shcounterenw1p0" diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index c3f91d2..e006e74 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -128,43 +128,113 @@ define i64 @ctz_nxv8i1_no_range(<vscale x 8 x i16> %a) { define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { ; RV32-LABEL: ctz_nxv16i1: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; RV32-NEXT: vfirst.m a0, v8 +; RV32-NEXT: bgez a0, .LBB2_2 +; RV32-NEXT: # %bb.1: ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 -; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vid.v v16 -; RV32-NEXT: li a1, -1 -; RV32-NEXT: vmadd.vx v16, a1, v8 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32-NEXT: vredmaxu.vs v8, v8, v8 -; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: sub a0, a0, a1 +; RV32-NEXT: .LBB2_2: ; RV32-NEXT: ret ; ; RV64-LABEL: ctz_nxv16i1: ; RV64: # %bb.0: -; RV64-NEXT: vmv1r.v v0, v8 +; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; RV64-NEXT: vfirst.m a0, v8 +; RV64-NEXT: bgez a0, .LBB2_2 +; RV64-NEXT: # %bb.1: ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmv.v.x v8, a0 -; RV64-NEXT: vid.v v16 -; RV64-NEXT: li a1, -1 -; RV64-NEXT: vmadd.vx v16, a1, v8 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64-NEXT: vredmaxu.vs v8, v8, v8 -; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: subw a0, a0, a1 +; RV64-NEXT: .LBB2_2: ; RV64-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 0) ret i32 %res } +define i32 @ctz_nxv16i1_poison(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) { +; RV32-LABEL: ctz_nxv16i1_poison: +; RV32: # %bb.0: +; RV32-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; RV32-NEXT: vfirst.m a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ctz_nxv16i1_poison: +; RV64: # %bb.0: +; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma +; RV64-NEXT: vfirst.m a0, v8 +; RV64-NEXT: ret + %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %a, i1 1) + ret i32 %res +} + +define i32 @ctz_v16i1(<16 x i1> %pg, <16 x i1> %a) { +; RV32-LABEL: ctz_v16i1: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vfirst.m a0, v8 +; RV32-NEXT: bgez a0, .LBB4_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 16 +; RV32-NEXT: .LBB4_2: +; RV32-NEXT: ret +; +; RV64-LABEL: ctz_v16i1: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vfirst.m a0, v8 +; RV64-NEXT: bgez a0, .LBB4_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a0, 16 +; RV64-NEXT: .LBB4_2: +; RV64-NEXT: ret + %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0) + ret i32 %res +} + +define i32 @ctz_v16i1_poison(<16 x i1> %pg, <16 x i1> %a) { +; RV32-LABEL: ctz_v16i1_poison: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vfirst.m a0, v8 +; RV32-NEXT: ret +; +; RV64-LABEL: ctz_v16i1_poison: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vfirst.m a0, v8 +; RV64-NEXT: ret + %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 1) + ret i32 %res +} + +define i16 @ctz_v8i1_i16_ret(<8 x i1> %a) { +; RV32-LABEL: ctz_v8i1_i16_ret: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vfirst.m a0, v0 +; RV32-NEXT: bgez a0, .LBB6_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 8 +; RV32-NEXT: .LBB6_2: +; RV32-NEXT: ret +; +; RV64-LABEL: ctz_v8i1_i16_ret: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vfirst.m a0, v0 +; RV64-NEXT: bgez a0, .LBB6_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a0, 8 +; RV64-NEXT: .LBB6_2: +; RV64-NEXT: ret + %res = call i16 @llvm.experimental.cttz.elts.i16.v8i1(<8 x i1> %a, i1 0) + ret i16 %res +} + declare i64 @llvm.experimental.cttz.elts.i64.nxv8i16(<vscale x 8 x i16>, i1) declare i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1>, i1) declare i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32>, i1) +declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1) +declare i16 @llvm.experimental.cttz.elts.i16.v16i1(<8 x i1>, i1) attributes #0 = { vscale_range(2,1024) } diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll index 15abc9b..94b717b 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts.ll @@ -1,38 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=riscv32 < %s | FileCheck %s -check-prefix=RV32 -; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -check-prefix=RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s -check-prefix=RV64 ; FIXED WIDTH define i16 @ctz_v4i32(<4 x i32> %a) { ; RV32-LABEL: ctz_v4i32: ; RV32: # %bb.0: -; RV32-NEXT: lw a3, 0(a0) -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a2, 12(a0) -; RV32-NEXT: lw a4, 8(a0) -; RV32-NEXT: seqz a0, a3 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: andi a0, a0, 4 -; RV32-NEXT: seqz a3, a4 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: andi a3, a3, 2 -; RV32-NEXT: bltu a3, a0, .LBB0_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB0_2: -; RV32-NEXT: snez a2, a2 -; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: andi a1, a1, 3 -; RV32-NEXT: bltu a2, a1, .LBB0_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: .LBB0_4: -; RV32-NEXT: bltu a1, a0, .LBB0_6 -; RV32-NEXT: # %bb.5: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB0_6: +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmsne.vi v0, v8, 0 +; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vrsub.vi v9, v9, 4 +; RV32-NEXT: vand.vv v8, v8, v9 +; RV32-NEXT: vredmaxu.vs v8, v8, v8 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 4 ; RV32-NEXT: sub a1, a1, a0 ; RV32-NEXT: andi a0, a1, 255 @@ -40,32 +24,16 @@ define i16 @ctz_v4i32(<4 x i32> %a) { ; ; RV64-LABEL: ctz_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: lw a3, 0(a0) -; RV64-NEXT: lw a1, 8(a0) -; RV64-NEXT: lw a2, 24(a0) -; RV64-NEXT: lw a4, 16(a0) -; RV64-NEXT: seqz a0, a3 -; RV64-NEXT: addi a0, a0, -1 -; RV64-NEXT: andi a0, a0, 4 -; RV64-NEXT: seqz a3, a4 -; RV64-NEXT: addi a3, a3, -1 -; RV64-NEXT: andi a3, a3, 2 -; RV64-NEXT: bltu a3, a0, .LBB0_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB0_2: -; RV64-NEXT: snez a2, a2 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: andi a1, a1, 3 -; RV64-NEXT: bltu a2, a1, .LBB0_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: .LBB0_4: -; RV64-NEXT: bltu a1, a0, .LBB0_6 -; RV64-NEXT: # %bb.5: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB0_6: +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmsne.vi v0, v8, 0 +; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vrsub.vi v9, v9, 4 +; RV64-NEXT: vand.vv v8, v8, v9 +; RV64-NEXT: vredmaxu.vs v8, v8, v8 +; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: li a1, 4 ; RV64-NEXT: subw a1, a1, a0 ; RV64-NEXT: andi a0, a1, 255 @@ -79,32 +47,14 @@ define i16 @ctz_v4i32(<4 x i32> %a) { define i32 @ctz_v2i1_poison(<2 x i1> %a) { ; RV32-LABEL: ctz_v2i1_poison: ; RV32: # %bb.0: -; RV32-NEXT: andi a1, a1, 1 -; RV32-NEXT: slli a0, a0, 31 -; RV32-NEXT: srai a0, a0, 31 -; RV32-NEXT: andi a0, a0, 2 -; RV32-NEXT: bltu a1, a0, .LBB1_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB1_2: -; RV32-NEXT: li a1, 2 -; RV32-NEXT: sub a1, a1, a0 -; RV32-NEXT: andi a0, a1, 255 +; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV32-NEXT: vfirst.m a0, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: ctz_v2i1_poison: ; RV64: # %bb.0: -; RV64-NEXT: andi a1, a1, 1 -; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: srai a0, a0, 63 -; RV64-NEXT: andi a0, a0, 2 -; RV64-NEXT: bltu a1, a0, .LBB1_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB1_2: -; RV64-NEXT: li a1, 2 -; RV64-NEXT: subw a1, a1, a0 -; RV64-NEXT: andi a0, a1, 255 +; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RV64-NEXT: vfirst.m a0, v0 ; RV64-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.v2i1(<2 x i1> %a, i1 1) ret i32 %res diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll index 343695e..833e073 100644 --- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST ; ---------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll index 12ec088..02f5823 100644 --- a/llvm/test/CodeGen/RISCV/memcpy.ll +++ b/llvm/test/CodeGen/RISCV/memcpy.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/memset-inline.ll b/llvm/test/CodeGen/RISCV/memset-inline.ll index cc22b77..55fe81a 100644 --- a/llvm/test/CodeGen/RISCV/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/memset-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index af341db..364e8c7 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -465,6 +465,192 @@ define i32 @mulhu_constant(i32 %a) nounwind { ret i32 %4 } +define i32 @muli32_p14(i32 %a) nounwind { +; RV32I-LABEL: muli32_p14: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p14: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 14 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p14: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 14 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p14: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 14 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 14 + ret i32 %1 +} + +define i32 @muli32_p28(i32 %a) nounwind { +; RV32I-LABEL: muli32_p28: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 28 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p28: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 28 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p28: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 28 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p28: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 28 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 28 + ret i32 %1 +} + +define i32 @muli32_p30(i32 %a) nounwind { +; RV32I-LABEL: muli32_p30: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 30 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p30: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 30 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p30: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 30 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p30: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 30 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 30 + ret i32 %1 +} + +define i32 @muli32_p56(i32 %a) nounwind { +; RV32I-LABEL: muli32_p56: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 56 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p56: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 56 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p56: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 56 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p56: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 56 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 56 + ret i32 %1 +} + +define i32 @muli32_p60(i32 %a) nounwind { +; RV32I-LABEL: muli32_p60: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 60 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p60: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 60 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p60: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 60 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p60: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 60 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 60 + ret i32 %1 +} + +define i32 @muli32_p62(i32 %a) nounwind { +; RV32I-LABEL: muli32_p62: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 62 +; RV32I-NEXT: tail __mulsi3 +; +; RV32IM-LABEL: muli32_p62: +; RV32IM: # %bb.0: +; RV32IM-NEXT: li a1, 62 +; RV32IM-NEXT: mul a0, a0, a1 +; RV32IM-NEXT: ret +; +; RV64I-LABEL: muli32_p62: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: li a1, 62 +; RV64I-NEXT: call __muldi3 +; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret +; +; RV64IM-LABEL: muli32_p62: +; RV64IM: # %bb.0: +; RV64IM-NEXT: li a1, 62 +; RV64IM-NEXT: mulw a0, a0, a1 +; RV64IM-NEXT: ret + %1 = mul i32 %a, 62 + ret i32 %1 +} + define i32 @muli32_p65(i32 %a) nounwind { ; RV32I-LABEL: muli32_p65: ; RV32I: # %bb.0: @@ -600,6 +786,8 @@ define i64 @muli64_p63(i64 %a) nounwind { ret i64 %1 } + + define i32 @muli32_m63(i32 %a) nounwind { ; RV32I-LABEL: muli32_m63: ; RV32I: # %bb.0: @@ -1145,10 +1333,10 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-NEXT: sltu a7, a6, a4 ; RV32I-NEXT: sub t0, t1, t0 ; RV32I-NEXT: mv t1, a7 -; RV32I-NEXT: beq a5, a3, .LBB30_2 +; RV32I-NEXT: beq a5, a3, .LBB36_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, a5, a3 -; RV32I-NEXT: .LBB30_2: +; RV32I-NEXT: .LBB36_2: ; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sltu a1, a2, t1 ; RV32I-NEXT: sub a1, t0, a1 @@ -1261,10 +1449,10 @@ define i128 @muli128_m63(i128 %a) nounwind { ; RV32I-NEXT: slli t0, a1, 6 ; RV32I-NEXT: or a7, t0, a7 ; RV32I-NEXT: mv t0, a5 -; RV32I-NEXT: beq a1, a7, .LBB31_2 +; RV32I-NEXT: beq a1, a7, .LBB37_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t0, a1, a7 -; RV32I-NEXT: .LBB31_2: +; RV32I-NEXT: .LBB37_2: ; RV32I-NEXT: srli t1, a1, 26 ; RV32I-NEXT: slli t2, a6, 6 ; RV32I-NEXT: or t1, t2, t1 diff --git a/llvm/test/CodeGen/RISCV/pr56110.ll b/llvm/test/CodeGen/RISCV/pr56110.ll index c795b17..fa441f5 100644 --- a/llvm/test/CodeGen/RISCV/pr56110.ll +++ b/llvm/test/CodeGen/RISCV/pr56110.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv32 | FileCheck %s -; RUN: llc < %s -mtriple=riscv32 -mattr=+fast-unaligned-access | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+unaligned-scalar-mem | FileCheck %s define void @foo_set(ptr nocapture noundef %a, i32 noundef %v) { ; CHECK-LABEL: foo_set: diff --git a/llvm/test/CodeGen/RISCV/prefer-w-inst.ll b/llvm/test/CodeGen/RISCV/prefer-w-inst.ll new file mode 100644 index 0000000..34ab74d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/prefer-w-inst.ll @@ -0,0 +1,105 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=NO-PREFER-W-INST %s +; RUN: llc -mtriple=riscv64 -mattr=+m -riscv-disable-strip-w-suffix -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=NO-STRIP %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+prefer-w-inst -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=PREFER-W-INST %s + +define i32 @addiw(i32 %a) { +; NO-PREFER-W-INST-LABEL: addiw: +; NO-PREFER-W-INST: # %bb.0: +; NO-PREFER-W-INST-NEXT: lui a1, 1 +; NO-PREFER-W-INST-NEXT: addi a1, a1, -1 +; NO-PREFER-W-INST-NEXT: addw a0, a0, a1 +; NO-PREFER-W-INST-NEXT: ret +; +; NO-STRIP-LABEL: addiw: +; NO-STRIP: # %bb.0: +; NO-STRIP-NEXT: lui a1, 1 +; NO-STRIP-NEXT: addiw a1, a1, -1 +; NO-STRIP-NEXT: addw a0, a0, a1 +; NO-STRIP-NEXT: ret +; +; PREFER-W-INST-LABEL: addiw: +; PREFER-W-INST: # %bb.0: +; PREFER-W-INST-NEXT: lui a1, 1 +; PREFER-W-INST-NEXT: addiw a1, a1, -1 +; PREFER-W-INST-NEXT: addw a0, a0, a1 +; PREFER-W-INST-NEXT: ret + %ret = add i32 %a, 4095 + ret i32 %ret +} + +define i32 @addw(i32 %a, i32 %b) { +; NO-PREFER-W-INST-LABEL: addw: +; NO-PREFER-W-INST: # %bb.0: +; NO-PREFER-W-INST-NEXT: add a0, a0, a1 +; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; NO-PREFER-W-INST-NEXT: ret +; +; NO-STRIP-LABEL: addw: +; NO-STRIP: # %bb.0: +; NO-STRIP-NEXT: addw a0, a0, a1 +; NO-STRIP-NEXT: addiw a0, a0, 1024 +; NO-STRIP-NEXT: ret +; +; PREFER-W-INST-LABEL: addw: +; PREFER-W-INST: # %bb.0: +; PREFER-W-INST-NEXT: addw a0, a0, a1 +; PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; PREFER-W-INST-NEXT: ret + %add = add i32 %a, %b + %ret = add i32 %add, 1024 + ret i32 %ret +} + +define i32 @mulw(i32 %a, i32 %b) { +; NO-PREFER-W-INST-LABEL: mulw: +; NO-PREFER-W-INST: # %bb.0: +; NO-PREFER-W-INST-NEXT: mul a1, a0, a1 +; NO-PREFER-W-INST-NEXT: mul a0, a0, a1 +; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; NO-PREFER-W-INST-NEXT: ret +; +; NO-STRIP-LABEL: mulw: +; NO-STRIP: # %bb.0: +; NO-STRIP-NEXT: mulw a1, a0, a1 +; NO-STRIP-NEXT: mulw a0, a0, a1 +; NO-STRIP-NEXT: addiw a0, a0, 1024 +; NO-STRIP-NEXT: ret +; +; PREFER-W-INST-LABEL: mulw: +; PREFER-W-INST: # %bb.0: +; PREFER-W-INST-NEXT: mulw a1, a0, a1 +; PREFER-W-INST-NEXT: mulw a0, a0, a1 +; PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; PREFER-W-INST-NEXT: ret + %mul1 = mul i32 %a, %b + %mul = mul i32 %a, %mul1 + %ret = add i32 %mul, 1024 + ret i32 %ret +} + +define i32 @slliw(i32 %a) { +; NO-PREFER-W-INST-LABEL: slliw: +; NO-PREFER-W-INST: # %bb.0: +; NO-PREFER-W-INST-NEXT: slli a0, a0, 1 +; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; NO-PREFER-W-INST-NEXT: ret +; +; NO-STRIP-LABEL: slliw: +; NO-STRIP: # %bb.0: +; NO-STRIP-NEXT: slliw a0, a0, 1 +; NO-STRIP-NEXT: addiw a0, a0, 1024 +; NO-STRIP-NEXT: ret +; +; PREFER-W-INST-LABEL: slliw: +; PREFER-W-INST: # %bb.0: +; PREFER-W-INST-NEXT: slliw a0, a0, 1 +; PREFER-W-INST-NEXT: addiw a0, a0, 1024 +; PREFER-W-INST-NEXT: ret + %shl = shl i32 %a, 1 + %ret = add i32 %shl, 1024 + ret i32 %ret +} diff --git a/llvm/test/CodeGen/RISCV/prefer-w-inst.mir b/llvm/test/CodeGen/RISCV/prefer-w-inst.mir new file mode 100644 index 0000000..e05e27a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/prefer-w-inst.mir @@ -0,0 +1,262 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-opt-w-instrs -verify-machineinstrs \ +# RUN: -mattr=+m -o - | FileCheck %s -check-prefixes=NO-PREFER-W-INST +# RUN: llc %s -mtriple=riscv64 -run-pass=riscv-opt-w-instrs -verify-machineinstrs \ +# RUN: -mattr=+m,+prefer-w-inst -o - | FileCheck %s -check-prefixes=PREFER-W-INST + +--- +name: addi +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: addi + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY]], 1 + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[ADDI]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: addi + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[COPY]], 1 + ; PREFER-W-INST-NEXT: [[ADDIW1:%[0-9]+]]:gpr = ADDIW [[ADDIW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW1]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 1 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: add +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: add + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[COPY1]] + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[ADD]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: add + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[ADDW:%[0-9]+]]:gpr = ADDW [[COPY]], [[COPY1]] + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[ADDW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: sub +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: sub + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY]], [[COPY1]] + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SUB]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: sub + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[SUBW:%[0-9]+]]:gpr = SUBW [[COPY]], [[COPY1]] + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SUBW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SUB %1, %2 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: mul +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: mul + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[COPY]], [[COPY1]] + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[MUL]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: mul + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[MULW:%[0-9]+]]:gpr = MULW [[COPY]], [[COPY1]] + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[MULW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = MUL %1, %2 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + + +--- +name: slli_31 +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: slli_31 + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 31 + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SLLI]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: slli_31 + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[SLLIW:%[0-9]+]]:gpr = SLLIW [[COPY]], 31 + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SLLIW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SLLI %1, 31 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: slli_32 +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: slli_32 + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32 + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SLLI]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: slli_32 + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[SLLI:%[0-9]+]]:gpr = SLLI [[COPY]], 32 + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[SLLI]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SLLI %1, 32 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: ld +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: ld + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[LD:%[0-9]+]]:gpr = LD [[COPY]], 0 + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LD]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: ld + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = LD %1, 0 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... + +--- +name: lwu +body: | + bb.0.entry: + liveins: $x10, $x11 + ; NO-PREFER-W-INST-LABEL: name: lwu + ; NO-PREFER-W-INST: liveins: $x10, $x11 + ; NO-PREFER-W-INST-NEXT: {{ $}} + ; NO-PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; NO-PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; NO-PREFER-W-INST-NEXT: [[LWU:%[0-9]+]]:gpr = LWU [[COPY]], 0 + ; NO-PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LWU]], 1 + ; NO-PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; NO-PREFER-W-INST-NEXT: PseudoRET + ; + ; PREFER-W-INST-LABEL: name: lwu + ; PREFER-W-INST: liveins: $x10, $x11 + ; PREFER-W-INST-NEXT: {{ $}} + ; PREFER-W-INST-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; PREFER-W-INST-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; PREFER-W-INST-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 + ; PREFER-W-INST-NEXT: [[ADDIW:%[0-9]+]]:gpr = ADDIW [[LW]], 1 + ; PREFER-W-INST-NEXT: $x10 = COPY [[ADDIW]] + ; PREFER-W-INST-NEXT: PseudoRET + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = LWU %1, 0 + %4:gpr = ADDIW %3, 1 + $x10 = COPY %4 + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll b/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll index a03dadb..d627ae9 100644 --- a/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll +++ b/llvm/test/CodeGen/RISCV/riscv-func-target-feature.ll @@ -36,7 +36,7 @@ entry: } ; CHECK-NOT: .option push -define void @test5() "target-features"="+fast-unaligned-access" { +define void @test5() "target-features"="+unaligned-scalar-mem" { ; CHECK-LABEL: test5 ; CHECK-NOT: .option pop entry: diff --git a/llvm/test/CodeGen/RISCV/rv32zba.ll b/llvm/test/CodeGen/RISCV/rv32zba.ll index 0908a39..cc632a0 100644 --- a/llvm/test/CodeGen/RISCV/rv32zba.ll +++ b/llvm/test/CodeGen/RISCV/rv32zba.ll @@ -271,31 +271,49 @@ define i32 @mul288(i32 %a) { } define i32 @mul258(i32 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul258: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 258 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul258: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh1add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 258 ret i32 %c } define i32 @mul260(i32 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul260: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 260 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul260: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh2add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 260 ret i32 %c } define i32 @mul264(i32 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV32I-LABEL: mul264: +; RV32I: # %bb.0: +; RV32I-NEXT: li a1, 264 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBA-LABEL: mul264: +; RV32ZBA: # %bb.0: +; RV32ZBA-NEXT: slli a1, a0, 8 +; RV32ZBA-NEXT: sh3add a0, a0, a1 +; RV32ZBA-NEXT: ret %c = mul i32 %a, 264 ret i32 %c } diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll index 90cfb1f..ee9b73c 100644 --- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zba.ll @@ -811,31 +811,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind { } define i64 @mul258(i64 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul258: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 258 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul258: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 258 ret i64 %c } define i64 @mul260(i64 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul260: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 260 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul260: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh2add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 260 ret i64 %c } define i64 @mul264(i64 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul264: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 264 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul264: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh3add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 264 ret i64 %c } diff --git a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll index 6f56bab..1450c86 100644 --- a/llvm/test/CodeGen/RISCV/rv64xtheadba.ll +++ b/llvm/test/CodeGen/RISCV/rv64xtheadba.ll @@ -268,6 +268,23 @@ define i64 @mul96(i64 %a) { ret i64 %c } +define i64 @mul137(i64 %a) { +; RV64I-LABEL: mul137: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 137 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64XTHEADBA-LABEL: mul137: +; RV64XTHEADBA: # %bb.0: +; RV64XTHEADBA-NEXT: th.addsl a1, a0, a0, 3 +; RV64XTHEADBA-NEXT: slli a0, a0, 7 +; RV64XTHEADBA-NEXT: add a0, a0, a1 +; RV64XTHEADBA-NEXT: ret + %c = mul i64 %a, 137 + ret i64 %c +} + define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 067addc8..b4c80b6 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -567,6 +567,87 @@ define i64 @mul96(i64 %a) { ret i64 %c } +define i64 @mul119(i64 %a) { +; CHECK-LABEL: mul119: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 119 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 119 + ret i64 %c +} + +define i64 @mul123(i64 %a) { +; CHECK-LABEL: mul123: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 123 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 123 + ret i64 %c +} + +define i64 @mul125(i64 %a) { +; CHECK-LABEL: mul125: +; CHECK: # %bb.0: +; CHECK-NEXT: li a1, 125 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: ret + %c = mul i64 %a, 125 + ret i64 %c +} + +define i64 @mul131(i64 %a) { +; RV64I-LABEL: mul131: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 131 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul131: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh1add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 131 + ret i64 %c +} + +define i64 @mul133(i64 %a) { +; RV64I-LABEL: mul133: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 133 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul133: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh2add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 133 + ret i64 %c +} + +define i64 @mul137(i64 %a) { +; RV64I-LABEL: mul137: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 137 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul137: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: sh3add a1, a0, a0 +; RV64ZBA-NEXT: slli a0, a0, 7 +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret + %c = mul i64 %a, 137 + ret i64 %c +} + define i64 @mul160(i64 %a) { ; RV64I-LABEL: mul160: ; RV64I: # %bb.0: @@ -834,31 +915,49 @@ define i64 @adduw_imm(i32 signext %0) nounwind { } define i64 @mul258(i64 %a) { -; CHECK-LABEL: mul258: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 258 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul258: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 258 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul258: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh1add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 258 ret i64 %c } define i64 @mul260(i64 %a) { -; CHECK-LABEL: mul260: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 260 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul260: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 260 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul260: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh2add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 260 ret i64 %c } define i64 @mul264(i64 %a) { -; CHECK-LABEL: mul264: -; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 264 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: ret +; RV64I-LABEL: mul264: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, 264 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: mul264: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: slli a1, a0, 8 +; RV64ZBA-NEXT: sh3add a0, a0, a1 +; RV64ZBA-NEXT: ret %c = mul i64 %a, 264 ret i64 %c } @@ -2389,3 +2488,48 @@ define i64 @array_index_sh4_sh3(ptr %p, i64 %idx1, i64 %idx2) { %b = load i64, ptr %a, align 8 ret i64 %b } + +define ptr @test_gep_gep_dont_crash(ptr %p, i64 %a1, i64 %a2) { +; RV64I-LABEL: test_gep_gep_dont_crash: +; RV64I: # %bb.0: +; RV64I-NEXT: srliw a2, a2, 6 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: test_gep_gep_dont_crash: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: srliw a2, a2, 6 +; RV64ZBA-NEXT: add a1, a2, a1 +; RV64ZBA-NEXT: sh3add a0, a1, a0 +; RV64ZBA-NEXT: ret + %lshr = lshr i64 %a2, 6 + %and = and i64 %lshr, 67108863 + %gep1 = getelementptr i64, ptr %p, i64 %and + %gep2 = getelementptr i64, ptr %gep1, i64 %a1 + ret ptr %gep2 +} + +define i64 @regression(i32 signext %x, i32 signext %y) { +; RV64I-LABEL: regression: +; RV64I: # %bb.0: +; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: li a1, 3 +; RV64I-NEXT: slli a1, a1, 35 +; RV64I-NEXT: mulhu a0, a0, a1 +; RV64I-NEXT: ret +; +; RV64ZBA-LABEL: regression: +; RV64ZBA: # %bb.0: +; RV64ZBA-NEXT: subw a0, a0, a1 +; RV64ZBA-NEXT: slli.uw a0, a0, 3 +; RV64ZBA-NEXT: sh1add a0, a0, a0 +; RV64ZBA-NEXT: ret + %sub = sub i32 %x, %y + %ext = zext i32 %sub to i64 + %res = mul nuw nsw i64 %ext, 24 + ret i64 %res +} diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll new file mode 100644 index 0000000..c99388c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=OMIT-FP %s +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs -frame-pointer=all < %s \ +; RUN: | FileCheck -check-prefix=NO-OMIT-FP %s + +define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x i32> %va) { +; OMIT-FP-LABEL: test_vector_callee_cfi: +; OMIT-FP: # %bb.0: # %entry +; OMIT-FP-NEXT: addi sp, sp, -16 +; OMIT-FP-NEXT: .cfi_def_cfa_offset 16 +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 3 +; OMIT-FP-NEXT: sub sp, sp, a0 +; OMIT-FP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: li a1, 6 +; OMIT-FP-NEXT: mul a0, a0, a1 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 2 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 2 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 4 * vlenb +; OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 8 * vlenb +; OMIT-FP-NEXT: #APP +; OMIT-FP-NEXT: #NO_APP +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: li a1, 6 +; OMIT-FP-NEXT: mul a0, a0, a1 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 2 +; OMIT-FP-NEXT: add a0, sp, a0 +; OMIT-FP-NEXT: addi a0, a0, 16 +; OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: addi a0, sp, 16 +; OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; OMIT-FP-NEXT: csrr a0, vlenb +; OMIT-FP-NEXT: slli a0, a0, 3 +; OMIT-FP-NEXT: add sp, sp, a0 +; OMIT-FP-NEXT: addi sp, sp, 16 +; OMIT-FP-NEXT: ret +; +; NO-OMIT-FP-LABEL: test_vector_callee_cfi: +; NO-OMIT-FP: # %bb.0: # %entry +; NO-OMIT-FP-NEXT: addi sp, sp, -32 +; NO-OMIT-FP-NEXT: .cfi_def_cfa_offset 32 +; NO-OMIT-FP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; NO-OMIT-FP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; NO-OMIT-FP-NEXT: .cfi_offset ra, -8 +; NO-OMIT-FP-NEXT: .cfi_offset s0, -16 +; NO-OMIT-FP-NEXT: addi s0, sp, 32 +; NO-OMIT-FP-NEXT: .cfi_def_cfa s0, 0 +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub sp, sp, a0 +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 1 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 2 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 2 * vlenb +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 32 - 4 * vlenb +; NO-OMIT-FP-NEXT: .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 32 - 8 * vlenb +; NO-OMIT-FP-NEXT: #APP +; NO-OMIT-FP-NEXT: #NO_APP +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 1 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 2 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl2r.v v2, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: csrr a0, vlenb +; NO-OMIT-FP-NEXT: slli a0, a0, 3 +; NO-OMIT-FP-NEXT: sub a0, s0, a0 +; NO-OMIT-FP-NEXT: addi a0, a0, -32 +; NO-OMIT-FP-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload +; NO-OMIT-FP-NEXT: addi sp, s0, -32 +; NO-OMIT-FP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; NO-OMIT-FP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; NO-OMIT-FP-NEXT: addi sp, sp, 32 +; NO-OMIT-FP-NEXT: ret +entry: + call void asm sideeffect "", + "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() + + ret <vscale x 1 x i32> %va +} diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 139579b..9822e0d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -112,9 +112,9 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 +; CHECK-NEXT: vslideup.vi v0, v8, 2 ; CHECK-NEXT: ret %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc) ret <32 x i1> %mask @@ -132,27 +132,27 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 2 +; CHECK-NEXT: vslideup.vi v0, v8, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v16, 4 +; CHECK-NEXT: vslideup.vi v0, v8, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI9_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 -; CHECK-NEXT: vmsltu.vx v16, v8, a2 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v16, 6 +; CHECK-NEXT: vslideup.vi v0, v8, 6 ; CHECK-NEXT: ret %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc) ret <64 x i1> %mask @@ -170,30 +170,30 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v10, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v8, v16, a2 +; CHECK-NEXT: vsaddu.vx v8, v16, a1 +; CHECK-NEXT: vmsltu.vx v8, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v8, v0, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_2) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 +; CHECK-NEXT: vslideup.vi v8, v0, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_3) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 6 +; CHECK-NEXT: vslideup.vi v8, v0, 6 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_4) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) @@ -203,27 +203,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 2 +; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_5) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v0, v9, 4 +; CHECK-NEXT: vslideup.vi v0, v16, 4 ; CHECK-NEXT: lui a0, %hi(.LCPI10_6) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vsext.vf8 v16, v9 ; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v9, v16, a2 +; CHECK-NEXT: vmsltu.vx v16, v16, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v0, v9, 6 +; CHECK-NEXT: vslideup.vi v0, v16, 6 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vslideup.vi v0, v8, 8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll index 6875925..53d1ae5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/binop-splats.ll @@ -88,11 +88,11 @@ define <vscale x 16 x i1> @nxv16i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v10, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v11, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v10, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v2 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 16 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 16 x i1> %head.x, <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer @@ -108,11 +108,11 @@ define <vscale x 32 x i1> @nxv32i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v12, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v13, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v12, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v4 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 32 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 32 x i1> %head.x, <vscale x 32 x i1> poison, <vscale x 32 x i32> zeroinitializer @@ -128,11 +128,11 @@ define <vscale x 64 x i1> @nxv64i1(i1 %x, i1 %y) { ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v16, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v17, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v16, v17 +; CHECK-NEXT: vmsne.vi v8, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v8 ; CHECK-NEXT: ret %head.x = insertelement <vscale x 64 x i1> poison, i1 %x, i32 0 %splat.x = shufflevector <vscale x 64 x i1> %head.x, <vscale x 64 x i1> poison, <vscale x 64 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index b446175..28de935 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2335,28 +2335,28 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2423,21 +2423,21 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -2697,28 +2697,28 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2785,21 +2785,21 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 7fbdfb3..7234601 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -1069,28 +1069,28 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1127,21 +1127,21 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -1316,28 +1316,28 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1374,21 +1374,21 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll index 78e8700..647d315 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -162,3 +162,206 @@ define void @caller_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>} %x) } declare void @callee_tuple_argument({<vscale x 4 x i32>, <vscale x 4 x i32>}) + +; %0 -> v8 +; %1 -> v9 +define <vscale x 1 x i64> @case1(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1) { +; CHECK-LABEL: case1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %a = add <vscale x 1 x i64> %0, %1 + ret <vscale x 1 x i64> %a +} + +; %0 -> v8 +; %1 -> v10-v11 +; %2 -> v9 +define <vscale x 1 x i64> @case2_1(<vscale x 1 x i64> %0, <vscale x 2 x i64> %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case2_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %a = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %a +} +define <vscale x 2 x i64> @case2_2(<vscale x 1 x i64> %0, <vscale x 2 x i64> %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case2_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v10 +; CHECK-NEXT: ret + %a = add <vscale x 2 x i64> %1, %1 + ret <vscale x 2 x i64> %a +} + +; %0 -> v8 +; %1 -> {v10-v11, v12-v13} +; %2 -> v9 +define <vscale x 1 x i64> @case3_1(<vscale x 1 x i64> %0, {<vscale x 2 x i64>, <vscale x 2 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case3_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %add = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %add +} +define <vscale x 2 x i64> @case3_2(<vscale x 1 x i64> %0, {<vscale x 2 x i64>, <vscale x 2 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case3_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vadd.vv v8, v10, v12 +; CHECK-NEXT: ret + %a = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %1, 0 + %b = extractvalue { <vscale x 2 x i64>, <vscale x 2 x i64> } %1, 1 + %add = add <vscale x 2 x i64> %a, %b + ret <vscale x 2 x i64> %add +} + +; %0 -> v8 +; %1 -> {by-ref, by-ref} +; %2 -> v9 +define <vscale x 8 x i64> @case4_1(<vscale x 1 x i64> %0, {<vscale x 8 x i64>, <vscale x 8 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case4_1: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vl8re64.v v8, (a1) +; CHECK-NEXT: vl8re64.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vadd.vv v8, v16, v8 +; CHECK-NEXT: ret + %a = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i64> } %1, 0 + %b = extractvalue { <vscale x 8 x i64>, <vscale x 8 x i64> } %1, 1 + %add = add <vscale x 8 x i64> %a, %b + ret <vscale x 8 x i64> %add +} +define <vscale x 1 x i64> @case4_2(<vscale x 1 x i64> %0, {<vscale x 8 x i64>, <vscale x 8 x i64>} %1, <vscale x 1 x i64> %2) { +; CHECK-LABEL: case4_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: ret + %add = add <vscale x 1 x i64> %0, %2 + ret <vscale x 1 x i64> %add +} + +declare <vscale x 1 x i64> @callee1() +declare void @callee2(<vscale x 1 x i64>) +declare void @callee3(<vscale x 4 x i32>) +define void @caller() { +; RV32-LABEL: caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee1 +; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v8 +; RV32-NEXT: call callee2 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee1 +; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: call callee2 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call <vscale x 1 x i64> @callee1() + %add = add <vscale x 1 x i64> %a, %a + call void @callee2(<vscale x 1 x i64> %add) + ret void +} + +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple() +define void @caller_tuple() { +; RV32-LABEL: caller_tuple: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee_tuple +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: call callee3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller_tuple: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee_tuple +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: call callee3 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @callee_tuple() + %b = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 0 + %c = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %a, 1 + %add = add <vscale x 4 x i32> %b, %c + call void @callee3(<vscale x 4 x i32> %add) + ret void +} + +declare {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} @callee_nested() +define void @caller_nested() { +; RV32-LABEL: caller_nested: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: call callee_nested +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v12 +; RV32-NEXT: call callee3 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: caller_nested: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: call callee_nested +; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v12 +; RV64-NEXT: call callee3 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %a = call {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} @callee_nested() + %b = extractvalue {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} %a, 0 + %c = extractvalue {<vscale x 4 x i32>, {<vscale x 4 x i32>, <vscale x 4 x i32>}} %a, 1 + %c0 = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %c, 0 + %c1 = extractvalue {<vscale x 4 x i32>, <vscale x 4 x i32>} %c, 1 + %add0 = add <vscale x 4 x i32> %b, %c0 + %add1 = add <vscale x 4 x i32> %add0, %c1 + call void @callee3(<vscale x 4 x i32> %add1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll index ddd5028..d396d6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -140,11 +140,9 @@ define <vscale x 8 x half> @vp_ceil_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -184,11 +182,9 @@ define <vscale x 16 x half> @vp_ceil_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -228,11 +224,9 @@ define <vscale x 32 x half> @vp_ceil_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -356,11 +350,9 @@ define <vscale x 4 x float> @vp_ceil_vv_nxv4f32(<vscale x 4 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -400,11 +392,9 @@ define <vscale x 8 x float> @vp_ceil_vv_nxv8f32(<vscale x 8 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -444,11 +434,9 @@ define <vscale x 16 x float> @vp_ceil_vv_nxv16f32(<vscale x 16 x float> %va, <vs ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -530,11 +518,9 @@ define <vscale x 2 x double> @vp_ceil_vv_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -574,11 +560,9 @@ define <vscale x 4 x double> @vp_ceil_vv_nxv4f64(<vscale x 4 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -618,11 +602,9 @@ define <vscale x 7 x double> @vp_ceil_vv_nxv7f64(<vscale x 7 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -662,11 +644,9 @@ define <vscale x 8 x double> @vp_ceil_vv_nxv8f64(<vscale x 8 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -716,11 +696,9 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -734,11 +712,9 @@ define <vscale x 16 x double> @vp_ceil_vv_nxv16f64(<vscale x 16 x double> %va, < ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll index f244810..ff35043 100644 --- a/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll +++ b/llvm/test/CodeGen/RISCV/rvv/concat-vectors-constant-stride.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+fast-unaligned-access -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+v,+unaligned-vector-mem -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+fast-unaligned-access -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+v,+unaligned-vector-mem -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define void @constant_forward_stride(ptr %s, ptr %d) { diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 3972f52..d74ec69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -275,8 +275,8 @@ define i1 @extractelt_nxv16i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl2r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 0 -; CHECK-NEXT: vfirst.m a0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 16 x i8>, ptr %x @@ -290,8 +290,8 @@ define i1 @extractelt_nxv32i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl4r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 0 -; CHECK-NEXT: vfirst.m a0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 32 x i8>, ptr %x @@ -305,8 +305,8 @@ define i1 @extractelt_nxv64i1_idx0(ptr %x) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <vscale x 64 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll index ee8c322..1d07ac8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-binop-splats.ll @@ -125,11 +125,11 @@ define <32 x i1> @v32i1(i1 %x, i1 %y) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v10, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v11, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v10, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v2 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vrgather.vi v10, v8, 0 @@ -150,11 +150,11 @@ define <64 x i1> @v64i1(i1 %x, i1 %y) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmsne.vi v12, v8, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v13, v8, 0 -; CHECK-NEXT: vmxor.mm v0, v12, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 0 +; CHECK-NEXT: vmxor.mm v0, v0, v4 ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vrgather.vi v12, v8, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 33e8869..dc2e011 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1709,28 +1709,28 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1794,21 +1794,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -2077,28 +2077,28 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2162,21 +2162,21 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index ab212bc..095d2fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -803,28 +803,28 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -861,21 +861,21 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t @@ -1038,28 +1038,28 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v24, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a4, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 24, v0.t -; RV32-NEXT: vand.vx v8, v8, a4, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1096,21 +1096,21 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2, v0.t ; RV64-NEXT: lui a3, 16 ; RV64-NEXT: addiw a3, a3, -256 -; RV64-NEXT: vand.vx v24, v8, a3, v0.t +; RV64-NEXT: vand.vx v16, v8, a3, v0.t ; RV64-NEXT: li a4, 40 -; RV64-NEXT: vsll.vx v24, v24, a4, v0.t -; RV64-NEXT: vsll.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v16, v24, v0.t +; RV64-NEXT: vsll.vx v16, v16, a4, v0.t +; RV64-NEXT: vor.vv v16, v24, v16, v0.t ; RV64-NEXT: addi a5, sp, 16 ; RV64-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV64-NEXT: vsrl.vx v24, v8, a4, v0.t -; RV64-NEXT: vand.vx v24, v24, a3, v0.t -; RV64-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV64-NEXT: vor.vv v16, v24, v16, v0.t +; RV64-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV64-NEXT: vsrl.vx v16, v8, a4, v0.t +; RV64-NEXT: vand.vx v16, v16, a3, v0.t +; RV64-NEXT: vor.vv v16, v16, v24, v0.t ; RV64-NEXT: vsrl.vi v24, v8, 24, v0.t ; RV64-NEXT: vand.vx v24, v24, a1, v0.t ; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index 5348e7d..2af0292 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 3 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 3 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 3 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index 232d453..108bd85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -154,9 +154,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v10, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32-NEXT: vmv.x.s a0, v10 +; RV32-NEXT: vmv.x.s a0, v0 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 ; RV32-NEXT: ret @@ -166,9 +166,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v10, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV64-NEXT: vmv.x.s a0, v10 +; RV64-NEXT: vmv.x.s a0, v0 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: ret @@ -178,9 +178,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 32 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v10, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV32ZBS-NEXT: vmv.x.s a0, v10 +; RV32ZBS-NEXT: vmv.x.s a0, v0 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret ; @@ -189,9 +189,9 @@ define i1 @extractelt_v32i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 32 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v10, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v10 +; RV64ZBS-NEXT: vmv.x.s a0, v0 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret %a = load <32 x i8>, ptr %x @@ -206,10 +206,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 64 ; RV32-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v12, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: srli a0, a1, 5 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vslidedown.vx v8, v12, a0 +; RV32-NEXT: vslidedown.vx v8, v0, a0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 @@ -220,9 +220,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 64 ; RV64-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v12, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; RV64-NEXT: vmv.x.s a0, v12 +; RV64-NEXT: vmv.x.s a0, v0 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 ; RV64-NEXT: ret @@ -232,10 +232,10 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 64 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v12, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: srli a0, a1, 5 ; RV32ZBS-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32ZBS-NEXT: vslidedown.vx v8, v12, a0 +; RV32ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV32ZBS-NEXT: vmv.x.s a0, v8 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret @@ -245,9 +245,9 @@ define i1 @extractelt_v64i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 64 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v12, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; RV64ZBS-NEXT: vmv.x.s a0, v12 +; RV64ZBS-NEXT: vmv.x.s a0, v0 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret %a = load <64 x i8>, ptr %x @@ -262,10 +262,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: li a2, 128 ; RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: vmseq.vi v16, v8, 0 +; RV32-NEXT: vmseq.vi v0, v8, 0 ; RV32-NEXT: srli a0, a1, 5 ; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vx v8, v16, a0 +; RV32-NEXT: vslidedown.vx v8, v0, a0 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: srl a0, a0, a1 ; RV32-NEXT: andi a0, a0, 1 @@ -276,10 +276,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: li a2, 128 ; RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: vmseq.vi v16, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 ; RV64-NEXT: srli a0, a1, 6 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vx v8, v16, a0 +; RV64-NEXT: vslidedown.vx v8, v0, a0 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: srl a0, a0, a1 ; RV64-NEXT: andi a0, a0, 1 @@ -290,10 +290,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: li a2, 128 ; RV32ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: vmseq.vi v16, v8, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV32ZBS-NEXT: srli a0, a1, 5 ; RV32ZBS-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32ZBS-NEXT: vslidedown.vx v8, v16, a0 +; RV32ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV32ZBS-NEXT: vmv.x.s a0, v8 ; RV32ZBS-NEXT: bext a0, a0, a1 ; RV32ZBS-NEXT: ret @@ -303,10 +303,10 @@ define i1 @extractelt_v128i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: li a2, 128 ; RV64ZBS-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: vmseq.vi v16, v8, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 ; RV64ZBS-NEXT: srli a0, a1, 6 ; RV64ZBS-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64ZBS-NEXT: vslidedown.vx v8, v16, a0 +; RV64ZBS-NEXT: vslidedown.vx v8, v0, a0 ; RV64ZBS-NEXT: vmv.x.s a0, v8 ; RV64ZBS-NEXT: bext a0, a0, a1 ; RV64ZBS-NEXT: ret @@ -527,8 +527,8 @@ define i1 @extractelt_v32i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v10, v8, 0 -; CHECK-NEXT: vfirst.m a0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x @@ -543,8 +543,8 @@ define i1 @extractelt_v64i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v12, v8, 0 -; CHECK-NEXT: vfirst.m a0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x @@ -559,8 +559,8 @@ define i1 @extractelt_v128i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x @@ -575,8 +575,8 @@ define i1 @extractelt_v256i1_idx0(ptr %x) nounwind { ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmseq.vi v16, v8, 0 -; CHECK-NEXT: vfirst.m a0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vfirst.m a0, v0 ; CHECK-NEXT: seqz a0, a0 ; CHECK-NEXT: ret %a = load <256 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index 6915722..25b9805 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 2 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 2 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 2 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index edb3315..8d378fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.maximum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmax_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <8 x half> @llvm.vp.maximum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmax_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: @@ -214,11 +210,10 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <16 x half> @llvm.vp.maximum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmax_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: @@ -290,11 +281,10 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,15 +297,15 @@ declare <2 x float> @llvm.vp.maximum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmax_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -326,11 +316,10 @@ define <2 x float> @vfmax_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.maximum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -341,15 +330,15 @@ declare <4 x float> @llvm.vp.maximum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmax_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -360,11 +349,10 @@ define <4 x float> @vfmax_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.maximum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -375,17 +363,15 @@ declare <8 x float> @llvm.vp.maximum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmax_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -396,11 +382,10 @@ define <8 x float> @vfmax_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.maximum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -411,17 +396,15 @@ declare <16 x float> @llvm.vp.maximum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmax_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -432,11 +415,10 @@ define <16 x float> @vfmax_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.maximum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -447,15 +429,15 @@ declare <2 x double> @llvm.vp.maximum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmax_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -466,11 +448,10 @@ define <2 x double> @vfmax_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.maximum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -481,17 +462,15 @@ declare <4 x double> @llvm.vp.maximum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmax_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -502,11 +481,10 @@ define <4 x double> @vfmax_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.maximum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -517,17 +495,15 @@ declare <8 x double> @llvm.vp.maximum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmax_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -538,11 +514,10 @@ define <8 x double> @vfmax_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.maximum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -553,30 +528,15 @@ declare <16 x double> @llvm.vp.maximum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmax_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.maximum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -587,9 +547,8 @@ define <16 x double> @vfmax_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -605,16 +564,15 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -622,16 +580,8 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 @@ -639,26 +589,27 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -668,36 +619,28 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t +; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,7 +677,6 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb @@ -742,7 +684,7 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -768,9 +710,8 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 48649c4..3831261 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -13,32 +13,33 @@ declare <2 x half> @llvm.vp.minimum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32) define <2 x half> @vfmin_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: @@ -66,12 +66,11 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <4 x half> @llvm.vp.minimum.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32) define <4 x half> @vfmin_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: @@ -138,12 +137,11 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <8 x half> @llvm.vp.minimum.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32) define <8 x half> @vfmin_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: @@ -214,11 +210,10 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <16 x half> @llvm.vp.minimum.v16f16(<16 x half>, <16 x half>, <16 x i1>, define <16 x half> @vfmin_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: @@ -290,11 +281,10 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,15 +297,15 @@ declare <2 x float> @llvm.vp.minimum.v2f32(<2 x float>, <2 x float>, <2 x i1>, i define <2 x float> @vfmin_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -326,11 +316,10 @@ define <2 x float> @vfmin_vv_v2f32_unmasked(<2 x float> %va, <2 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.minimum.v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -341,15 +330,15 @@ declare <4 x float> @llvm.vp.minimum.v4f32(<4 x float>, <4 x float>, <4 x i1>, i define <4 x float> @vfmin_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -360,11 +349,10 @@ define <4 x float> @vfmin_vv_v4f32_unmasked(<4 x float> %va, <4 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.minimum.v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -375,17 +363,15 @@ declare <8 x float> @llvm.vp.minimum.v8f32(<8 x float>, <8 x float>, <8 x i1>, i define <8 x float> @vfmin_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -396,11 +382,10 @@ define <8 x float> @vfmin_vv_v8f32_unmasked(<8 x float> %va, <8 x float> %vb, i3 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.minimum.v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -411,17 +396,15 @@ declare <16 x float> @llvm.vp.minimum.v16f32(<16 x float>, <16 x float>, <16 x i define <16 x float> @vfmin_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -432,11 +415,10 @@ define <16 x float> @vfmin_vv_v16f32_unmasked(<16 x float> %va, <16 x float> %vb ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.minimum.v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -447,15 +429,15 @@ declare <2 x double> @llvm.vp.minimum.v2f64(<2 x double>, <2 x double>, <2 x i1> define <2 x double> @vfmin_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -466,11 +448,10 @@ define <2 x double> @vfmin_vv_v2f64_unmasked(<2 x double> %va, <2 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.minimum.v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -481,17 +462,15 @@ declare <4 x double> @llvm.vp.minimum.v4f64(<4 x double>, <4 x double>, <4 x i1> define <4 x double> @vfmin_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -502,11 +481,10 @@ define <4 x double> @vfmin_vv_v4f64_unmasked(<4 x double> %va, <4 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.minimum.v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -517,17 +495,15 @@ declare <8 x double> @llvm.vp.minimum.v8f64(<8 x double>, <8 x double>, <8 x i1> define <8 x double> @vfmin_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -538,11 +514,10 @@ define <8 x double> @vfmin_vv_v8f64_unmasked(<8 x double> %va, <8 x double> %vb, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.minimum.v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -553,30 +528,15 @@ declare <16 x double> @llvm.vp.minimum.v16f64(<16 x double>, <16 x double>, <16 define <16 x double> @vfmin_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.minimum.v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -587,9 +547,8 @@ define <16 x double> @vfmin_vv_v16f64_unmasked(<16 x double> %va, <16 x double> ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -605,16 +564,15 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill @@ -622,16 +580,8 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB24_2 @@ -639,26 +589,27 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmv1r.v v6, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -668,36 +619,28 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t +; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -734,7 +677,6 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: .LBB25_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb @@ -742,7 +684,7 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 @@ -768,9 +710,8 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v16, v16, v24 ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index e201d5d..3b85328 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -262,9 +262,9 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll index a566fab..9a4c8af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -22,8 +22,8 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -50,8 +50,8 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -166,8 +166,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v10, (a1) -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vsm.v v12, (a2) +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16: @@ -178,8 +178,8 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y @@ -194,8 +194,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v10, (a1) -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vsm.v v12, (a2) +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans: @@ -206,8 +206,8 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y @@ -222,8 +222,8 @@ define void @fcmp_oge_vv_v8f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y @@ -238,8 +238,8 @@ define void @fcmp_oge_vv_v8f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v10, (a1) -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = load <8 x float>, ptr %y @@ -305,8 +305,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v12, (a1) -; ZVFH-NEXT: vmflt.vv v16, v12, v8 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmflt.vv v0, v12, v8 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a2) ; ZVFH-NEXT: ret ; @@ -319,8 +319,8 @@ define void @fcmp_ule_vv_v32f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v24, v16 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a2) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -337,8 +337,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v12, (a1) -; ZVFH-NEXT: vmfle.vv v16, v8, v12 -; ZVFH-NEXT: vsm.v v16, (a2) +; ZVFH-NEXT: vmfle.vv v0, v8, v12 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_v32f16_nonans: @@ -350,8 +350,8 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a2) +; ZVFHMIN-NEXT: vmfle.vv v0, v24, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a2) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = load <32 x half>, ptr %y @@ -366,8 +366,8 @@ define void @fcmp_uge_vv_v16f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v12, (a1) -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v8, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -383,8 +383,8 @@ define void @fcmp_uge_vv_v16f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v12, (a1) -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = load <16 x float>, ptr %y @@ -399,8 +399,8 @@ define void @fcmp_ult_vv_v8f64(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v12, (a1) -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -416,8 +416,8 @@ define void @fcmp_ult_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v12, (a1) -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = load <8 x double>, ptr %y @@ -433,8 +433,8 @@ define void @fcmp_ugt_vv_v64f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v16, (a1) -; ZVFH-NEXT: vmfle.vv v24, v8, v16 -; ZVFH-NEXT: vmnot.m v8, v24 +; ZVFH-NEXT: vmfle.vv v0, v8, v16 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a2) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -451,8 +451,8 @@ define void @fcmp_ugt_vv_v64f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: vle16.v v16, (a1) -; ZVFH-NEXT: vmflt.vv v24, v16, v8 -; ZVFH-NEXT: vsm.v v24, (a2) +; ZVFH-NEXT: vmflt.vv v0, v16, v8 +; ZVFH-NEXT: vsm.v v0, (a2) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = load <64 x half>, ptr %y @@ -468,9 +468,9 @@ define void @fcmp_ueq_vv_v32f32(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v16, (a1) -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v8, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -487,8 +487,8 @@ define void @fcmp_ueq_vv_v32f32_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v16, (a1) -; CHECK-NEXT: vmfeq.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmfeq.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = load <32 x float>, ptr %y @@ -503,9 +503,9 @@ define void @fcmp_one_vv_v8f64(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v8, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a2) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -521,8 +521,8 @@ define void @fcmp_one_vv_v8f64_nonans(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vmfne.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmfne.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = load <16 x double>, ptr %y @@ -657,8 +657,8 @@ define void @fcmp_oeq_vf_v8f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -690,8 +690,8 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -806,8 +806,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16: @@ -823,8 +823,8 @@ define void @fcmp_olt_vf_v16f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -839,8 +839,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_v16f16_nonans: @@ -856,8 +856,8 @@ define void @fcmp_olt_vf_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -872,8 +872,8 @@ define void @fcmp_oge_vf_v8f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -888,8 +888,8 @@ define void @fcmp_oge_vf_v8f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -955,8 +955,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v12, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v12 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret ; @@ -974,8 +974,8 @@ define void @fcmp_ule_vf_v32f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v24, v16 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v24, v16 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -992,8 +992,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfle.vf v12, v8, fa0 -; ZVFH-NEXT: vsm.v v12, (a1) +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_v32f16_nonans: @@ -1010,8 +1010,8 @@ define void @fcmp_ule_vf_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v24 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v24 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = insertelement <32 x half> poison, half %y, i32 0 @@ -1026,8 +1026,8 @@ define void @fcmp_uge_vf_v16f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -1043,8 +1043,8 @@ define void @fcmp_uge_vf_v16f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = insertelement <16 x float> poison, float %y, i32 0 @@ -1059,8 +1059,8 @@ define void @fcmp_ult_vf_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -1076,8 +1076,8 @@ define void @fcmp_ult_vf_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = insertelement <8 x double> poison, double %y, i32 0 @@ -1093,8 +1093,8 @@ define void @fcmp_ugt_vf_v64f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfle.vf v16, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -1111,8 +1111,8 @@ define void @fcmp_ugt_vf_v64f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v16, v8, fa0 -; ZVFH-NEXT: vsm.v v16, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = insertelement <64 x half> poison, half %y, i32 0 @@ -1128,9 +1128,9 @@ define void @fcmp_ueq_vf_v32f32(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v8, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -1147,8 +1147,8 @@ define void @fcmp_ueq_vf_v32f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfeq.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 @@ -1163,9 +1163,9 @@ define void @fcmp_one_vf_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v8, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -1181,8 +1181,8 @@ define void @fcmp_one_vf_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = insertelement <16 x double> poison, double %y, i32 0 @@ -1330,8 +1330,8 @@ define void @fcmp_oeq_fv_v8f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -1363,8 +1363,8 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = insertelement <8 x half> poison, half %y, i32 0 @@ -1479,8 +1479,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16: @@ -1496,8 +1496,8 @@ define void @fcmp_olt_fv_v16f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -1512,8 +1512,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vsm.v v10, (a1) +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_fv_v16f16_nonans: @@ -1529,8 +1529,8 @@ define void @fcmp_olt_fv_v16f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = insertelement <16 x half> poison, half %y, i32 0 @@ -1545,8 +1545,8 @@ define void @fcmp_oge_fv_v8f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -1561,8 +1561,8 @@ define void @fcmp_oge_fv_v8f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x float>, ptr %x %b = insertelement <8 x float> poison, float %y, i32 0 @@ -1628,8 +1628,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v12, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v12 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret ; @@ -1647,8 +1647,8 @@ define void @fcmp_ule_fv_v32f16(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v24 -; ZVFHMIN-NEXT: vmnot.m v8, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v24 +; ZVFHMIN-NEXT: vmnot.m v8, v0 ; ZVFHMIN-NEXT: vsm.v v8, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x @@ -1665,8 +1665,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 32 ; ZVFH-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfge.vf v12, v8, fa0 -; ZVFH-NEXT: vsm.v v12, (a1) +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_fv_v32f16_nonans: @@ -1683,8 +1683,8 @@ define void @fcmp_ule_fv_v32f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16 -; ZVFHMIN-NEXT: vsm.v v8, (a1) +; ZVFHMIN-NEXT: vmfle.vv v0, v24, v16 +; ZVFHMIN-NEXT: vsm.v v0, (a1) ; ZVFHMIN-NEXT: ret %a = load <32 x half>, ptr %x %b = insertelement <32 x half> poison, half %y, i32 0 @@ -1699,8 +1699,8 @@ define void @fcmp_uge_fv_v16f32(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x @@ -1716,8 +1716,8 @@ define void @fcmp_uge_fv_v16f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x float>, ptr %x %b = insertelement <16 x float> poison, float %y, i32 0 @@ -1732,8 +1732,8 @@ define void @fcmp_ult_fv_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v8, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x @@ -1749,8 +1749,8 @@ define void @fcmp_ult_fv_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <8 x double>, ptr %x %b = insertelement <8 x double> poison, double %y, i32 0 @@ -1766,8 +1766,8 @@ define void @fcmp_ugt_fv_v64f16(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmfge.vf v16, v8, fa0 -; ZVFH-NEXT: vmnot.m v8, v16 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v8, v0 ; ZVFH-NEXT: vsm.v v8, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x @@ -1784,8 +1784,8 @@ define void @fcmp_ugt_fv_v64f16_nonans(ptr %x, half %y, ptr %z) { ; ZVFH-NEXT: li a2, 64 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v8, (a0) -; ZVFH-NEXT: vmflt.vf v16, v8, fa0 -; ZVFH-NEXT: vsm.v v16, (a1) +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vsm.v v0, (a1) ; ZVFH-NEXT: ret %a = load <64 x half>, ptr %x %b = insertelement <64 x half> poison, half %y, i32 0 @@ -1801,9 +1801,9 @@ define void @fcmp_ueq_fv_v32f32(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v8, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x @@ -1820,8 +1820,8 @@ define void @fcmp_ueq_fv_v32f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfeq.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 @@ -1836,9 +1836,9 @@ define void @fcmp_one_fv_v8f64(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v8, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v8, v8, v0 ; CHECK-NEXT: vsm.v v8, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x @@ -1854,8 +1854,8 @@ define void @fcmp_one_fv_v8f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vf v16, v8, fa0 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <16 x double>, ptr %x %b = insertelement <16 x double> poison, double %y, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll index 602662b..24d7a87 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp-mask.ll @@ -76,9 +76,8 @@ define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-LABEL: vfptosi_v4i1_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll index c5bfd41..da512e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp-mask.ll @@ -76,9 +76,8 @@ define <4 x i1> @vfptoui_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-LABEL: vfptoui_v4i1_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <4 x i1> @llvm.vp.fptoui.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index cc76fd5..7f03bab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -628,6 +628,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: vs8r.v v16, (a1) ; RV32-NEXT: addi sp, s0, -80 +; RV32-NEXT: .cfi_def_cfa sp, 80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 80 @@ -661,6 +662,7 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v16, (a1) ; RV64-NEXT: addi sp, s0, -80 +; RV64-NEXT: .cfi_def_cfa sp, 80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 80 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 1a905e5..a67ba6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -427,9 +427,9 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vwmaccu.vx v8, a0, v16 ; V128-NEXT: lui a1, 699051 ; V128-NEXT: addi a1, a1, -1366 -; V128-NEXT: li a2, 32 ; V128-NEXT: vmv.s.x v0, a1 -; V128-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V128-NEXT: li a1, 32 +; V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v8, v24, v0 ; V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; V128-NEXT: addi a1, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll index 0b08d94..1a7d440 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll @@ -52,8 +52,8 @@ define void @setgt_vv_v64i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v12, (a1) -; CHECK-NEXT: vmslt.vv v16, v12, v8 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmslt.vv v0, v12, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = load <64 x i8>, ptr %y @@ -69,8 +69,8 @@ define void @setlt_vv_v128i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmslt.vv v24, v8, v16 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmslt.vv v0, v8, v16 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = load <128 x i8>, ptr %y @@ -118,8 +118,8 @@ define void @setugt_vv_v32i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v10, (a1) -; CHECK-NEXT: vmsltu.vv v12, v10, v8 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsltu.vv v0, v10, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = load <32 x i8>, ptr %y @@ -135,8 +135,8 @@ define void @setult_vv_v64i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v12, (a1) -; CHECK-NEXT: vmsltu.vv v16, v8, v12 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsltu.vv v0, v8, v12 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = load <64 x i8>, ptr %y @@ -152,8 +152,8 @@ define void @setuge_vv_v128i8(ptr %x, ptr %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: vmsleu.vv v24, v16, v8 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmsleu.vv v0, v16, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = load <128 x i8>, ptr %y @@ -200,8 +200,8 @@ define void @setne_vx_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsne.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -217,8 +217,8 @@ define void @setgt_vx_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsgt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -234,8 +234,8 @@ define void @setlt_vx_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmslt.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmslt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -284,8 +284,8 @@ define void @setugt_vx_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsgtu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -301,8 +301,8 @@ define void @setult_vx_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsltu.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsltu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -319,8 +319,8 @@ define void @setuge_vx_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.x v16, a1 -; CHECK-NEXT: vmsleu.vv v24, v16, v8 -; CHECK-NEXT: vsm.v v24, (a2) +; CHECK-NEXT: vmsleu.vv v0, v16, v8 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -368,8 +368,8 @@ define void @setne_xv_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsne.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -385,8 +385,8 @@ define void @setgt_xv_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmslt.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmslt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -402,8 +402,8 @@ define void @setlt_xv_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsgt.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -452,8 +452,8 @@ define void @setugt_xv_v32i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsltu.vx v10, v8, a1 -; CHECK-NEXT: vsm.v v10, (a2) +; CHECK-NEXT: vmsltu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %b = insertelement <32 x i8> poison, i8 %y, i32 0 @@ -469,8 +469,8 @@ define void @setult_xv_v64i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vsetvli zero, a3, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vx v12, v8, a1 -; CHECK-NEXT: vsm.v v12, (a2) +; CHECK-NEXT: vmsgtu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %b = insertelement <64 x i8> poison, i8 %y, i32 0 @@ -486,8 +486,8 @@ define void @setuge_xv_v128i8(ptr %x, i8 %y, ptr %z) { ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsleu.vx v16, v8, a1 -; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: vmsleu.vx v0, v8, a1 +; CHECK-NEXT: vsm.v v0, (a2) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %b = insertelement <128 x i8> poison, i8 %y, i32 0 @@ -534,8 +534,8 @@ define void @setne_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %d = icmp ne <32 x i8> %a, splat (i8 0) @@ -549,8 +549,8 @@ define void @setgt_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vi v12, v8, 0 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsgt.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp sgt <64 x i8> %a, splat (i8 0) @@ -564,8 +564,8 @@ define void @setgt_vi_v64i8_nonzero(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgt.vi v12, v8, 5 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsgt.vi v0, v8, 5 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp sgt <64 x i8> %a, splat (i8 5) @@ -579,8 +579,8 @@ define void @setlt_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsle.vi v16, v8, -1 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %d = icmp slt <128 x i8> %a, splat (i8 0) @@ -622,8 +622,8 @@ define void @setugt_vi_v32i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vi v10, v8, 5 -; CHECK-NEXT: vsm.v v10, (a1) +; CHECK-NEXT: vmsgtu.vi v0, v8, 5 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %x %d = icmp ugt <32 x i8> %a, splat (i8 5) @@ -637,8 +637,8 @@ define void @setult_vi_v64i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsleu.vi v12, v8, 4 -; CHECK-NEXT: vsm.v v12, (a1) +; CHECK-NEXT: vmsleu.vi v0, v8, 4 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x %d = icmp ult <64 x i8> %a, splat (i8 5) @@ -652,8 +652,8 @@ define void @setuge_vi_v128i8(ptr %x, ptr %z) { ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmsgtu.vi v16, v8, 4 -; CHECK-NEXT: vsm.v v16, (a1) +; CHECK-NEXT: vmsgtu.vi v0, v8, 4 +; CHECK-NEXT: vsm.v v0, (a1) ; CHECK-NEXT: ret %a = load <128 x i8>, ptr %x %d = icmp uge <128 x i8> %a, splat (i8 5) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll index 9161ced..27adc7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleave-store.ll @@ -20,8 +20,8 @@ define void @vector_interleave_store_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b, ptr ; CHECK-NEXT: li a2, -1 ; CHECK-NEXT: vwmaccu.vx v12, a2, v10 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v12, 0 -; CHECK-NEXT: vsm.v v8, (a0) +; CHECK-NEXT: vmsne.vi v0, v12, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %res = call <32 x i1> @llvm.experimental.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b) store <32 x i1> %res, ptr %p diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 7fc442c..63c6dae5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -167,8 +167,8 @@ define void @splat_v32i1(ptr %x, i1 %y) { ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vsm.v v10, (a0) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %a = insertelement <32 x i1> poison, i1 %y, i32 0 %b = shufflevector <32 x i1> %a, <32 x i1> poison, <32 x i32> zeroinitializer @@ -201,8 +201,8 @@ define void @splat_v64i1(ptr %x, i1 %y) { ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v12, v8, 0 -; CHECK-NEXT: vsm.v v12, (a0) +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vsm.v v0, (a0) ; CHECK-NEXT: ret %a = insertelement <64 x i1> poison, i1 %y, i32 0 %b = shufflevector <64 x i1> %a, <64 x i1> poison, <64 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index c6665c4..bbff66e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -369,10 +369,10 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vmfeq.vf v0, v8, fa5 -; RV32-NEXT: vmfeq.vf v24, v16, fa5 +; RV32-NEXT: vmfeq.vf v16, v16, fa5 ; RV32-NEXT: vle64.v v8, (a0), v0.t ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: vle64.v v16, (a0), v0.t ; RV32-NEXT: vse64.v v8, (a2) ; RV32-NEXT: addi a0, a2, 128 @@ -387,10 +387,10 @@ define void @masked_load_v32f64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vmfeq.vf v0, v8, fa5 -; RV64-NEXT: vmfeq.vf v24, v16, fa5 +; RV64-NEXT: vmfeq.vf v16, v16, fa5 ; RV64-NEXT: vle64.v v8, (a0), v0.t ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 @@ -433,10 +433,10 @@ define void @masked_load_v64f32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: fmv.w.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v8, fa5 -; CHECK-NEXT: vmfeq.vf v24, v16, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 @@ -460,10 +460,10 @@ define void @masked_load_v128f16(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: fmv.h.x fa5, zero ; CHECK-NEXT: vmfeq.vf v0, v8, fa5 -; CHECK-NEXT: vmfeq.vf v24, v16, fa5 +; CHECK-NEXT: vmfeq.vf v16, v16, fa5 ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index b6568fb..7e825b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -425,10 +425,10 @@ define void @masked_load_v32i64(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: vmseq.vi v0, v8, 0 -; RV64-NEXT: vmseq.vi v24, v16, 0 +; RV64-NEXT: vmseq.vi v16, v16, 0 ; RV64-NEXT: vle64.v v8, (a0), v0.t ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vmv1r.v v0, v16 ; RV64-NEXT: vle64.v v16, (a0), v0.t ; RV64-NEXT: vse64.v v8, (a2) ; RV64-NEXT: addi a0, a2, 128 @@ -487,10 +487,10 @@ define void @masked_load_v64i32(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle32.v v8, (a1) ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 @@ -531,10 +531,10 @@ define void @masked_load_v256i8(ptr %a, ptr %m_ptr, ptr %res_ptr) nounwind { ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v16, (a3) ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vle8.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vle8.v v16, (a0), v0.t ; CHECK-NEXT: vse8.v v8, (a2) ; CHECK-NEXT: addi a0, a2, 128 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index 38cd831..7b9a1d8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -368,7 +368,8 @@ define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 ; RV32-NEXT: vle64.v v16, (a3) ; RV32-NEXT: vle64.v v8, (a2) ; RV32-NEXT: fcvt.d.w fa5, zero -; RV32-NEXT: vmfeq.vf v7, v16, fa5 +; RV32-NEXT: vmfeq.vf v0, v16, fa5 +; RV32-NEXT: vmv1r.v v7, v0 ; RV32-NEXT: vle64.v v16, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v24, (a0) @@ -386,7 +387,8 @@ define void @masked_store_v32f64(<32 x double>* %val_ptr, <32 x double>* %a, <32 ; RV64-NEXT: vle64.v v16, (a3) ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: fmv.d.x fa5, zero -; RV64-NEXT: vmfeq.vf v7, v16, fa5 +; RV64-NEXT: vmfeq.vf v0, v16, fa5 +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) @@ -432,7 +434,8 @@ define void @masked_store_v64f32(<64 x float>* %val_ptr, <64 x float>* %a, <64 x ; CHECK-NEXT: vle32.v v16, (a3) ; CHECK-NEXT: vle32.v v8, (a2) ; CHECK-NEXT: fmv.w.x fa5, zero -; CHECK-NEXT: vmfeq.vf v7, v16, fa5 +; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) @@ -459,7 +462,8 @@ define void @masked_store_v128f16(<128 x half>* %val_ptr, <128 x half>* %a, <128 ; CHECK-NEXT: vle16.v v16, (a3) ; CHECK-NEXT: vle16.v v8, (a2) ; CHECK-NEXT: fmv.h.x fa5, zero -; CHECK-NEXT: vmfeq.vf v7, v16, fa5 +; CHECK-NEXT: vmfeq.vf v0, v16, fa5 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v24, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index d3676d8..9db2a6b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -401,13 +401,13 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: addi a3, a2, 128 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a3) ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -416,26 +416,25 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v7, v24, v16 +; RV32-NEXT: vmseq.vv v0, v24, v16 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, a0, 128 ; RV32-NEXT: vle64.v v24, (a2) ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmseq.vv v0, v8, v16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmseq.vv v0, v0, v16 ; RV32-NEXT: addi a0, a1, 128 ; RV32-NEXT: vse64.v v24, (a0), v0.t -; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vse64.v v8, (a1), v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -446,7 +445,8 @@ define void @masked_store_v32i64(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; RV64-NEXT: addi a3, a2, 128 ; RV64-NEXT: vle64.v v8, (a3) ; RV64-NEXT: vle64.v v16, (a2) -; RV64-NEXT: vmseq.vi v7, v8, 0 +; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vmv1r.v v7, v0 ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v24, (a0) @@ -508,7 +508,8 @@ define void @masked_store_v64i32(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle32.v v8, (a3) ; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v24, (a0) @@ -552,7 +553,8 @@ define void @masked_store_v128i16(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle16.v v8, (a3) ; CHECK-NEXT: vle16.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v24, (a0) @@ -578,7 +580,8 @@ define void @masked_store_v256i8(ptr %val_ptr, ptr %a, ptr %m_ptr) nounwind { ; CHECK-NEXT: addi a3, a2, 128 ; CHECK-NEXT: vle8.v v8, (a3) ; CHECK-NEXT: vle8.v v16, (a2) -; CHECK-NEXT: vmseq.vi v7, v8, 0 +; CHECK-NEXT: vmseq.vi v0, v8, 0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v24, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index cc8d230..f84f79e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -140,11 +140,9 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -268,11 +266,9 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -312,11 +308,9 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -398,11 +392,9 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -442,11 +434,9 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -486,11 +476,9 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -530,11 +518,9 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -582,11 +568,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a1 @@ -600,11 +584,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 46c7f31..b957f67 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -1672,8 +1672,8 @@ define float @vreduce_fminimum_v8f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB103_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1708,8 +1708,8 @@ define float @vreduce_fminimum_v16f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB105_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1745,8 +1745,8 @@ define float @vreduce_fminimum_v32f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB107_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1790,8 +1790,8 @@ define float @vreduce_fminimum_v64f32(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB109_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1831,72 +1831,122 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB111_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -1907,9 +1957,9 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB111_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1985,8 +2035,8 @@ define double @vreduce_fminimum_v4f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB115_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI115_0) @@ -2021,8 +2071,8 @@ define double @vreduce_fminimum_v8f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB117_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI117_0) @@ -2057,8 +2107,8 @@ define double @vreduce_fminimum_v16f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB119_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI119_0) @@ -2100,8 +2150,8 @@ define double @vreduce_fminimum_v32f64(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB121_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI121_0) @@ -2140,71 +2190,121 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB123_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI123_0) @@ -2215,9 +2315,9 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB123_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2328,8 +2428,8 @@ define float @vreduce_fmaximum_v8f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB129_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2364,8 +2464,8 @@ define float @vreduce_fmaximum_v16f32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB131_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2401,8 +2501,8 @@ define float @vreduce_fmaximum_v32f32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB133_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2446,8 +2546,8 @@ define float @vreduce_fmaximum_v64f32(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB135_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2487,72 +2587,122 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle32.v v16, (a2) -; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v8, (a2) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB137_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, 523264 @@ -2563,9 +2713,9 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB137_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -2641,8 +2791,8 @@ define double @vreduce_fmaximum_v4f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vcpop.m a0, v10 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB141_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI141_0) @@ -2677,8 +2827,8 @@ define double @vreduce_fmaximum_v8f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vcpop.m a0, v12 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB143_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI143_0) @@ -2713,8 +2863,8 @@ define double @vreduce_fmaximum_v16f64(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB145_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI145_0) @@ -2756,8 +2906,8 @@ define double @vreduce_fmaximum_v32f64(ptr %x) { ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB147_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI147_0) @@ -2796,71 +2946,121 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a3, a3, a2 +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a1, a1, a0 +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vcpop.m a0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v8 +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB149_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI149_0) @@ -2871,9 +3071,9 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: .LBB149_3: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index 8f7a564..baff2e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1586,8 +1586,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vid.v v10 -; RV32-NEXT: vmsltu.vx v9, v10, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmsltu.vx v2, v10, a1 +; RV32-NEXT: vmand.mm v0, v2, v0 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v9, 1 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1615,8 +1615,8 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vid.v v10 -; RV64-NEXT: vmsltu.vx v9, v10, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmsltu.vx v2, v10, a1 +; RV64-NEXT: vmand.mm v0, v2, v0 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.i v9, 1 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1650,8 +1650,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vid.v v12 -; RV32-NEXT: vmsltu.vx v9, v12, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmsltu.vx v4, v12, a1 +; RV32-NEXT: vmand.mm v0, v4, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV32-NEXT: vmv.v.i v9, 1 ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1681,8 +1681,8 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vid.v v12 -; RV64-NEXT: vmsltu.vx v9, v12, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmsltu.vx v4, v12, a1 +; RV64-NEXT: vmand.mm v0, v4, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64-NEXT: vmv.v.i v9, 1 ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 @@ -1719,8 +1719,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV32-NEXT: li a0, 32 ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmsltu.vx v10, v16, a1 -; RV32-NEXT: vmand.mm v0, v10, v0 +; RV32-NEXT: vmsltu.vx v16, v16, a1 +; RV32-NEXT: vmand.mm v0, v16, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV32-NEXT: vmv.v.i v10, 1 ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -1753,8 +1753,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV64-NEXT: li a0, 32 ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmsltu.vx v10, v16, a1 -; RV64-NEXT: vmand.mm v0, v10, v0 +; RV64-NEXT: vmsltu.vx v16, v16, a1 +; RV64-NEXT: vmand.mm v0, v16, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RV64-NEXT: vmv.v.i v10, 1 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 @@ -1796,14 +1796,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: vle8.v v12, (a2) ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmsltu.vx v14, v16, a1 -; RV32-NEXT: vsext.vf4 v16, v12 -; RV32-NEXT: vmsltu.vx v12, v16, a1 +; RV32-NEXT: vmsltu.vx v16, v16, a1 +; RV32-NEXT: vsext.vf4 v24, v12 +; RV32-NEXT: vmsltu.vx v24, v24, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v14, v12, 4 +; RV32-NEXT: vslideup.vi v16, v24, 4 ; RV32-NEXT: li a0, 64 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vmand.mm v0, v14, v0 +; RV32-NEXT: vmand.mm v0, v16, v0 ; RV32-NEXT: vmv.v.i v12, 1 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vslidedown.vx v12, v8, a3 @@ -1840,14 +1840,14 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: vle8.v v12, (a2) ; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmsltu.vx v14, v16, a1 -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vmsltu.vx v12, v16, a1 +; RV64-NEXT: vmsltu.vx v16, v16, a1 +; RV64-NEXT: vsext.vf4 v24, v12 +; RV64-NEXT: vmsltu.vx v24, v24, a1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v14, v12, 4 +; RV64-NEXT: vslideup.vi v16, v24, 4 ; RV64-NEXT: li a0, 64 ; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64-NEXT: vmand.mm v0, v14, v0 +; RV64-NEXT: vmand.mm v0, v16, v0 ; RV64-NEXT: vmv.v.i v12, 1 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: vslidedown.vx v12, v8, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 1e4f344..257cffa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -128,10 +128,8 @@ define <16 x half> @vp_rint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -244,10 +242,8 @@ define <8 x float> @vp_rint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -284,10 +280,8 @@ define <16 x float> @vp_rint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -362,10 +356,8 @@ define <4 x double> @vp_rint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -402,10 +394,8 @@ define <8 x double> @vp_rint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -442,10 +432,8 @@ define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -482,10 +470,8 @@ define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -530,10 +516,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -546,10 +530,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index 91feb05..66bc452 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 4 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index 89ba2d7..59923dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 0 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 4faee56..0439d0b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -201,11 +201,9 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -436,11 +430,9 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,11 +472,9 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -566,11 +556,9 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -610,11 +598,9 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zer ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -654,11 +640,9 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -698,11 +682,9 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -750,11 +732,9 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a1, 1 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -768,11 +748,9 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 72f86dd..ec33a70 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -26,8 +26,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f16(<7 x half> %va, <7 x half> %vb, <7 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <7 x i1> @llvm.vp.fcmp.v7f16(<7 x half> %va, <7 x half> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) ret <7 x i1> %v @@ -48,8 +47,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -73,8 +71,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -100,8 +97,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -122,8 +118,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -147,8 +142,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -174,8 +168,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -196,8 +189,7 @@ define <8 x i1> @fcmp_oge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"oge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -221,8 +213,7 @@ define <8 x i1> @fcmp_oge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -248,8 +239,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -270,8 +260,7 @@ define <8 x i1> @fcmp_olt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"olt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -295,8 +284,7 @@ define <8 x i1> @fcmp_olt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -322,8 +310,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -344,8 +331,7 @@ define <8 x i1> @fcmp_ole_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ole", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -369,8 +355,7 @@ define <8 x i1> @fcmp_ole_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -396,8 +381,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -420,9 +404,9 @@ define <8 x i1> @fcmp_one_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"one", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -448,9 +432,9 @@ define <8 x i1> @fcmp_one_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v10, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -478,9 +462,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -502,12 +486,12 @@ define <8 x i1> @fcmp_ord_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ord", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -534,12 +518,12 @@ define <8 x i1> @fcmp_ord_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v2, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -568,12 +552,12 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -596,9 +580,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -624,9 +608,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v10, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -654,9 +638,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v2, v12, v10, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -678,8 +662,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -704,8 +688,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -732,8 +716,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -755,8 +739,8 @@ define <8 x i1> @fcmp_uge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -781,8 +765,8 @@ define <8 x i1> @fcmp_uge_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -809,8 +793,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -832,8 +816,8 @@ define <8 x i1> @fcmp_ult_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -858,8 +842,8 @@ define <8 x i1> @fcmp_ult_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -886,8 +870,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -909,8 +893,8 @@ define <8 x i1> @fcmp_ule_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"ule", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -935,8 +919,8 @@ define <8 x i1> @fcmp_ule_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v10, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -963,8 +947,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v10, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -985,8 +969,7 @@ define <8 x i1> @fcmp_une_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"une", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1010,8 +993,7 @@ define <8 x i1> @fcmp_une_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1037,8 +1019,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1060,12 +1041,12 @@ define <8 x i1> @fcmp_uno_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f16(<8 x half> %va, <8 x half> %vb, metadata !"uno", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1092,12 +1073,12 @@ define <8 x i1> @fcmp_uno_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v2, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1126,12 +1107,12 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmfne.vv v2, v10, v10, v0.t ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v10, v10, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v2 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <8 x half> poison, half %b, i32 0 %vb = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer @@ -1142,125 +1123,47 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i3 declare <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half>, <128 x half>, metadata, <128 x i1>, i32) define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 x i1> %m, i32 zeroext %evl) { -; ZVFH32-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH32: # %bb.0: -; ZVFH32-NEXT: addi sp, sp, -16 -; ZVFH32-NEXT: .cfi_def_cfa_offset 16 -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 4 -; ZVFH32-NEXT: sub sp, sp, a1 -; ZVFH32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; ZVFH32-NEXT: addi a1, a0, 128 -; ZVFH32-NEXT: li a3, 64 -; ZVFH32-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH32-NEXT: vle16.v v24, (a1) -; ZVFH32-NEXT: csrr a1, vlenb -; ZVFH32-NEXT: slli a1, a1, 3 -; ZVFH32-NEXT: add a1, sp, a1 -; ZVFH32-NEXT: addi a1, a1, 16 -; ZVFH32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH32-NEXT: vle16.v v24, (a0) -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH32-NEXT: vmv1r.v v6, v0 -; ZVFH32-NEXT: vslidedown.vi v25, v0, 8 -; ZVFH32-NEXT: vmv.v.v v7, v25 -; ZVFH32-NEXT: mv a0, a2 -; ZVFH32-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH32-NEXT: # %bb.1: -; ZVFH32-NEXT: li a0, 64 -; ZVFH32-NEXT: .LBB43_2: -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: addi a0, sp, 16 -; ZVFH32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmv1r.v v0, v6 -; ZVFH32-NEXT: vmfeq.vv v6, v8, v24, v0.t -; ZVFH32-NEXT: addi a0, a2, -64 -; ZVFH32-NEXT: sltu a1, a2, a0 -; ZVFH32-NEXT: addi a1, a1, -1 -; ZVFH32-NEXT: and a0, a1, a0 -; ZVFH32-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 3 -; ZVFH32-NEXT: add a0, sp, a0 -; ZVFH32-NEXT: addi a0, a0, 16 -; ZVFH32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH32-NEXT: vmv1r.v v25, v7 -; ZVFH32-NEXT: vmv1r.v v0, v7 -; ZVFH32-NEXT: vmfeq.vv v25, v16, v8, v0.t -; ZVFH32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH32-NEXT: vslideup.vi v6, v25, 8 -; ZVFH32-NEXT: vmv.v.v v0, v6 -; ZVFH32-NEXT: csrr a0, vlenb -; ZVFH32-NEXT: slli a0, a0, 4 -; ZVFH32-NEXT: add sp, sp, a0 -; ZVFH32-NEXT: addi sp, sp, 16 -; ZVFH32-NEXT: ret -; -; ZVFH64-LABEL: fcmp_oeq_vv_v128f16: -; ZVFH64: # %bb.0: -; ZVFH64-NEXT: addi sp, sp, -16 -; ZVFH64-NEXT: .cfi_def_cfa_offset 16 -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: li a3, 24 -; ZVFH64-NEXT: mul a1, a1, a3 -; ZVFH64-NEXT: sub sp, sp, a1 -; ZVFH64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; ZVFH64-NEXT: addi a1, a0, 128 -; ZVFH64-NEXT: li a3, 64 -; ZVFH64-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; ZVFH64-NEXT: vle16.v v24, (a1) -; ZVFH64-NEXT: csrr a1, vlenb -; ZVFH64-NEXT: slli a1, a1, 4 -; ZVFH64-NEXT: add a1, sp, a1 -; ZVFH64-NEXT: addi a1, a1, 16 -; ZVFH64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; ZVFH64-NEXT: vle16.v v24, (a0) -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 3 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; ZVFH64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH64-NEXT: mv a0, a2 -; ZVFH64-NEXT: vslidedown.vi v17, v0, 8 -; ZVFH64-NEXT: bltu a2, a3, .LBB43_2 -; ZVFH64-NEXT: # %bb.1: -; ZVFH64-NEXT: li a0, 64 -; ZVFH64-NEXT: .LBB43_2: -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 3 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmfeq.vv v16, v8, v24, v0.t -; ZVFH64-NEXT: addi a0, a2, -64 -; ZVFH64-NEXT: sltu a1, a2, a0 -; ZVFH64-NEXT: addi a1, a1, -1 -; ZVFH64-NEXT: and a0, a1, a0 -; ZVFH64-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: slli a0, a0, 4 -; ZVFH64-NEXT: add a0, sp, a0 -; ZVFH64-NEXT: addi a0, a0, 16 -; ZVFH64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: addi a0, sp, 16 -; ZVFH64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFH64-NEXT: vmv1r.v v0, v17 -; ZVFH64-NEXT: vmfeq.vv v17, v24, v8, v0.t -; ZVFH64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVFH64-NEXT: vslideup.vi v16, v17, 8 -; ZVFH64-NEXT: vmv.v.v v0, v16 -; ZVFH64-NEXT: csrr a0, vlenb -; ZVFH64-NEXT: li a1, 24 -; ZVFH64-NEXT: mul a0, a0, a1 -; ZVFH64-NEXT: add sp, sp, a0 -; ZVFH64-NEXT: addi sp, sp, 16 -; ZVFH64-NEXT: ret +; ZVFH-LABEL: fcmp_oeq_vv_v128f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: addi sp, sp, -16 +; ZVFH-NEXT: .cfi_def_cfa_offset 16 +; ZVFH-NEXT: csrr a1, vlenb +; ZVFH-NEXT: slli a1, a1, 3 +; ZVFH-NEXT: sub sp, sp, a1 +; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; ZVFH-NEXT: addi a1, a0, 128 +; ZVFH-NEXT: li a3, 64 +; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma +; ZVFH-NEXT: vle16.v v24, (a1) +; ZVFH-NEXT: addi a1, sp, 16 +; ZVFH-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: vle16.v v24, (a0) +; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v7, v0, 8 +; ZVFH-NEXT: mv a0, a2 +; ZVFH-NEXT: bltu a2, a3, .LBB43_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: li a0, 64 +; ZVFH-NEXT: .LBB43_2: +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vmfeq.vv v24, v8, v24, v0.t +; ZVFH-NEXT: addi a0, a2, -64 +; ZVFH-NEXT: sltu a1, a2, a0 +; ZVFH-NEXT: addi a1, a1, -1 +; ZVFH-NEXT: and a0, a1, a0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: addi a0, sp, 16 +; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v8, v16, v8, v0.t +; ZVFH-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; ZVFH-NEXT: vslideup.vi v24, v8, 8 +; ZVFH-NEXT: vmv.v.v v0, v24 +; ZVFH-NEXT: csrr a0, vlenb +; ZVFH-NEXT: slli a0, a0, 3 +; ZVFH-NEXT: add sp, sp, a0 +; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: ret ; ; ZVFHMIN32-LABEL: fcmp_oeq_vv_v128f16: ; ZVFHMIN32: # %bb.0: @@ -2367,8 +2270,7 @@ define <7 x i1> @fcmp_oeq_vv_v7f64(<7 x double> %va, <7 x double> %vb, <7 x i1> ; CHECK-LABEL: fcmp_oeq_vv_v7f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <7 x i1> @llvm.vp.fcmp.v7f64(<7 x double> %va, <7 x double> %vb, metadata !"oeq", <7 x i1> %m, i32 %evl) ret <7 x i1> %v @@ -2380,8 +2282,7 @@ define <8 x i1> @fcmp_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_oeq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2391,8 +2292,7 @@ define <8 x i1> @fcmp_oeq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_oeq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2404,8 +2304,7 @@ define <8 x i1> @fcmp_oeq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_oeq_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2417,8 +2316,7 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ogt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2428,8 +2326,7 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ogt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2441,8 +2338,7 @@ define <8 x i1> @fcmp_ogt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ogt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2454,8 +2350,7 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_oge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2465,8 +2360,7 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_oge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2478,8 +2372,7 @@ define <8 x i1> @fcmp_oge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_oge_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2491,8 +2384,7 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_olt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2502,8 +2394,7 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_olt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2515,8 +2406,7 @@ define <8 x i1> @fcmp_olt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_olt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2528,8 +2418,7 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ole_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2539,8 +2428,7 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ole_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2552,8 +2440,7 @@ define <8 x i1> @fcmp_ole_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ole_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2565,9 +2452,9 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2577,9 +2464,9 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_one_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2591,9 +2478,9 @@ define <8 x i1> @fcmp_one_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_one_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2605,9 +2492,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v4, v12, v12, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2619,9 +2506,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2635,9 +2522,9 @@ define <8 x i1> @fcmp_ord_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2649,9 +2536,9 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2661,9 +2548,9 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ueq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2675,9 +2562,9 @@ define <8 x i1> @fcmp_ueq_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ueq_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2689,8 +2576,8 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2700,8 +2587,8 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ugt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2713,8 +2600,8 @@ define <8 x i1> @fcmp_ugt_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ugt_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2726,8 +2613,8 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2737,8 +2624,8 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_uge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2750,8 +2637,8 @@ define <8 x i1> @fcmp_uge_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_uge_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2763,8 +2650,8 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2774,8 +2661,8 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ult_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2787,8 +2674,8 @@ define <8 x i1> @fcmp_ult_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ult_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2800,8 +2687,8 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2811,8 +2698,8 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_ule_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2824,8 +2711,8 @@ define <8 x i1> @fcmp_ule_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_ule_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2837,8 +2724,7 @@ define <8 x i1> @fcmp_une_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_une_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2848,8 +2734,7 @@ define <8 x i1> @fcmp_une_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-LABEL: fcmp_une_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2861,8 +2746,7 @@ define <8 x i1> @fcmp_une_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-LABEL: fcmp_une_vf_swap_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2874,9 +2758,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> ; CHECK-LABEL: fcmp_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v4, v12, v12, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2888,9 +2772,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2904,9 +2788,9 @@ define <8 x i1> @fcmp_uno_vf_swap_v8f64(<8 x double> %va, double %b, <8 x i1> %m ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vf v16, v12, fa0, v0.t -; CHECK-NEXT: vmfne.vv v12, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v4, v12, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %elt.head = insertelement <8 x double> poison, double %b, i32 0 %vb = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer @@ -2922,78 +2806,46 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 26 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a3, a1, 4 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vi v7, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a1, .LBB87_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v24, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v16, v17, 2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vi v24, v8, 2 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 26 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i1> @llvm.vp.fcmp.v32f64(<32 x double> %va, <32 x double> %vb, metadata !"oeq", <32 x i1> %m, i32 %evl) ret <32 x i1> %v } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ZVFH32: {{.*}} +; ZVFH64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 97b5181..65d9dd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -595,47 +595,36 @@ define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle8.v v8, (a2) +; CHECK-NEXT: vle8.v v24, (a2) ; CHECK-NEXT: addi a2, a3, -128 ; CHECK-NEXT: sltu a4, a3, a2 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: and a2, a4, a2 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB51_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -655,16 +644,15 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB52_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB52_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -684,16 +672,15 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a1, a4, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB53_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: .LBB53_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -707,8 +694,7 @@ define <8 x i1> @icmp_eq_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_eq_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -718,8 +704,7 @@ define <8 x i1> @icmp_eq_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: icmp_eq_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -731,8 +716,7 @@ define <8 x i1> @icmp_eq_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z ; CHECK-LABEL: icmp_eq_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -744,8 +728,7 @@ define <8 x i1> @icmp_eq_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_eq_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -755,8 +738,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_eq_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -766,8 +748,7 @@ define <8 x i1> @icmp_ne_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ne_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -777,8 +758,7 @@ define <8 x i1> @icmp_ne_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroex ; CHECK-LABEL: icmp_ne_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -790,8 +770,7 @@ define <8 x i1> @icmp_ne_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 z ; CHECK-LABEL: icmp_ne_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -803,8 +782,7 @@ define <8 x i1> @icmp_ne_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ne_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -814,8 +792,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_ne_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsne.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -825,8 +802,7 @@ define <8 x i1> @icmp_ugt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ugt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -836,8 +812,7 @@ define <8 x i1> @icmp_ugt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_ugt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -849,8 +824,7 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ugt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -862,8 +836,7 @@ define <8 x i1> @icmp_ugt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ugt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -873,8 +846,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ugt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -884,8 +856,7 @@ define <8 x i1> @icmp_uge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_uge_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -895,10 +866,9 @@ define <8 x i1> @icmp_uge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_uge_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -910,8 +880,7 @@ define <8 x i1> @icmp_uge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_uge_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -923,8 +892,7 @@ define <8 x i1> @icmp_uge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_uge_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -934,8 +902,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_uge_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -945,8 +912,7 @@ define <8 x i1> @icmp_ult_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ult_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -956,8 +922,7 @@ define <8 x i1> @icmp_ult_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_ult_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -969,8 +934,7 @@ define <8 x i1> @icmp_ult_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ult_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -982,8 +946,7 @@ define <8 x i1> @icmp_ult_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ult_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsleu.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -993,8 +956,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ult_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgtu.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1004,8 +966,7 @@ define <8 x i1> @icmp_sgt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sgt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1015,8 +976,7 @@ define <8 x i1> @icmp_sgt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sgt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1028,8 +988,7 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sgt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1041,8 +1000,7 @@ define <8 x i1> @icmp_sgt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sgt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1052,8 +1010,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sgt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1063,8 +1020,7 @@ define <8 x i1> @icmp_sge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sge_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1074,10 +1030,9 @@ define <8 x i1> @icmp_sge_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sge_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1089,8 +1044,7 @@ define <8 x i1> @icmp_sge_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sge_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1102,8 +1056,7 @@ define <8 x i1> @icmp_sge_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sge_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1113,8 +1066,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sge_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1124,8 +1076,7 @@ define <8 x i1> @icmp_slt_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_slt_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1135,8 +1086,7 @@ define <8 x i1> @icmp_slt_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_slt_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1148,8 +1098,7 @@ define <8 x i1> @icmp_slt_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_slt_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1161,8 +1110,7 @@ define <8 x i1> @icmp_slt_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_slt_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1172,8 +1120,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_slt_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1183,8 +1130,7 @@ define <8 x i1> @icmp_sle_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sle_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1194,8 +1140,7 @@ define <8 x i1> @icmp_sle_vx_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 zeroe ; CHECK-LABEL: icmp_sle_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1207,10 +1152,9 @@ define <8 x i1> @icmp_sle_vx_swap_v8i32(<8 x i32> %va, i32 %b, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_sle_vx_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vv v10, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -1222,8 +1166,7 @@ define <8 x i1> @icmp_sle_vi_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sle_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsle.vi v10, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> %va, <8 x i32> splat (i32 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1233,8 +1176,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sle_vi_swap_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmsgt.vi v10, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i32(<8 x i32> splat (i32 4), <8 x i32> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1243,125 +1185,47 @@ define <8 x i1> @icmp_sle_vi_swap_v8i32(<8 x i32> %va, <8 x i1> %m, i32 zeroext declare <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32>, <64 x i32>, metadata, <64 x i1>, i32) define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: icmp_eq_vv_v64i32: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: li a3, 32 -; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV32-NEXT: vmv1r.v v6, v0 -; RV32-NEXT: vslidedown.vi v25, v0, 4 -; RV32-NEXT: vmv1r.v v7, v25 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bltu a2, a3, .LBB99_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a0, 32 -; RV32-NEXT: .LBB99_2: -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: vmseq.vv v6, v8, v24, v0.t -; RV32-NEXT: addi a0, a2, -32 -; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmv1r.v v25, v7 -; RV32-NEXT: vmv1r.v v0, v7 -; RV32-NEXT: vmseq.vv v25, v16, v8, v0.t -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v6, v25, 4 -; RV32-NEXT: vmv1r.v v0, v6 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: icmp_eq_vv_v64i32: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 24 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: li a3, 32 -; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vle32.v v24, (a0) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: vslidedown.vi v17, v0, 4 -; RV64-NEXT: bltu a2, a3, .LBB99_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 32 -; RV64-NEXT: .LBB99_2: -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmseq.vv v16, v8, v24, v0.t -; RV64-NEXT: addi a0, a2, -32 -; RV64-NEXT: sltu a1, a2, a0 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a0, a1, a0 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 4 -; RV64-NEXT: add a0, sp, a0 -; RV64-NEXT: addi a0, a0, 16 -; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vmv1r.v v0, v17 -; RV64-NEXT: vmseq.vv v17, v24, v8, v0.t -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v16, v17, 4 -; RV64-NEXT: vmv1r.v v0, v16 -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 24 -; RV64-NEXT: mul a0, a0, a1 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: icmp_eq_vv_v64i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: bltu a2, a3, .LBB99_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: .LBB99_2: +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmseq.vv v24, v8, v24, v0.t +; CHECK-NEXT: addi a0, a2, -32 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmseq.vv v8, v16, v8, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v24, v8, 4 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret %v = call <64 x i1> @llvm.vp.icmp.v64i32(<64 x i32> %va, <64 x i32> %vb, metadata !"eq", <64 x i1> %m, i32 %evl) ret <64 x i1> %v } @@ -1371,24 +1235,24 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v25, v0, 4 +; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB100_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB100_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v8, v8, a0, v0.t ; CHECK-NEXT: addi a2, a1, -32 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v0, v16, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer @@ -1401,24 +1265,24 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vslidedown.vi v25, v0, 4 +; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB101_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: .LBB101_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v8, v8, a0, v0.t ; CHECK-NEXT: addi a2, a1, -32 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v8, v16, a0, v0.t -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vi v24, v8, 4 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vx v0, v16, a0, v0.t +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vslideup.vi v8, v0, 4 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer @@ -1432,8 +1296,7 @@ define <8 x i1> @icmp_eq_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_eq_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1448,18 +1311,16 @@ define <8 x i1> @icmp_eq_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmseq.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmseq.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1476,18 +1337,16 @@ define <8 x i1> @icmp_eq_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmseq.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmseq.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1499,8 +1358,7 @@ define <8 x i1> @icmp_eq_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_eq_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1510,8 +1368,7 @@ define <8 x i1> @icmp_eq_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_eq_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"eq", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1521,8 +1378,7 @@ define <8 x i1> @icmp_ne_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 ; CHECK-LABEL: icmp_ne_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1537,18 +1393,16 @@ define <8 x i1> @icmp_ne_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroex ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsne.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsne.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1565,18 +1419,16 @@ define <8 x i1> @icmp_ne_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 z ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsne.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsne.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsne.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1588,8 +1440,7 @@ define <8 x i1> @icmp_ne_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ne_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1599,8 +1450,7 @@ define <8 x i1> @icmp_ne_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext % ; CHECK-LABEL: icmp_ne_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ne", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1610,8 +1460,7 @@ define <8 x i1> @icmp_ugt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ugt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1626,18 +1475,16 @@ define <8 x i1> @icmp_ugt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1654,18 +1501,16 @@ define <8 x i1> @icmp_ugt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1677,8 +1522,7 @@ define <8 x i1> @icmp_ugt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ugt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1688,8 +1532,7 @@ define <8 x i1> @icmp_ugt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ugt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ugt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1699,8 +1542,7 @@ define <8 x i1> @icmp_uge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_uge_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1715,20 +1557,18 @@ define <8 x i1> @icmp_uge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1745,18 +1585,16 @@ define <8 x i1> @icmp_uge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsleu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsleu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsleu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsleu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1768,8 +1606,7 @@ define <8 x i1> @icmp_uge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_uge_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1779,8 +1616,7 @@ define <8 x i1> @icmp_uge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_uge_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"uge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1790,8 +1626,7 @@ define <8 x i1> @icmp_ult_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_ult_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1806,18 +1641,16 @@ define <8 x i1> @icmp_ult_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1834,18 +1667,16 @@ define <8 x i1> @icmp_ult_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsltu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsltu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1857,8 +1688,7 @@ define <8 x i1> @icmp_ult_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_ult_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1868,8 +1698,7 @@ define <8 x i1> @icmp_ult_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_ult_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"ult", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1879,8 +1708,7 @@ define <8 x i1> @icmp_sgt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sgt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1895,18 +1723,16 @@ define <8 x i1> @icmp_sgt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1923,18 +1749,16 @@ define <8 x i1> @icmp_sgt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -1946,8 +1770,7 @@ define <8 x i1> @icmp_sgt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sgt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1957,8 +1780,7 @@ define <8 x i1> @icmp_sgt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sgt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sgt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1968,8 +1790,7 @@ define <8 x i1> @icmp_sge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sge_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -1984,20 +1805,18 @@ define <8 x i1> @icmp_sge_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2014,18 +1833,16 @@ define <8 x i1> @icmp_sge_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2037,8 +1854,7 @@ define <8 x i1> @icmp_sge_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sge_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2048,8 +1864,7 @@ define <8 x i1> @icmp_sge_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sge_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sge", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2059,8 +1874,7 @@ define <8 x i1> @icmp_slt_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_slt_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2075,18 +1889,16 @@ define <8 x i1> @icmp_slt_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmslt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2103,18 +1915,16 @@ define <8 x i1> @icmp_slt_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmslt.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmslt.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsgt.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2126,8 +1936,7 @@ define <8 x i1> @icmp_slt_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_slt_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2137,8 +1946,7 @@ define <8 x i1> @icmp_slt_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_slt_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"slt", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2148,8 +1956,7 @@ define <8 x i1> @icmp_sle_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i3 ; CHECK-LABEL: icmp_sle_vv_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> %vb, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2164,18 +1971,16 @@ define <8 x i1> @icmp_sle_vx_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 zeroe ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vx v12, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2192,20 +1997,18 @@ define <8 x i1> @icmp_sle_vx_swap_v8i64(<8 x i64> %va, i64 %b, <8 x i1> %m, i32 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma -; RV32-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_swap_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.x v16, a0 +; RV64-NEXT: vmv.v.x v12, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsle.vv v12, v16, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vmsle.vv v0, v12, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -2217,8 +2020,7 @@ define <8 x i1> @icmp_sle_vi_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: icmp_sle_vi_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> %va, <8 x i64> splat (i64 4), metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v @@ -2228,8 +2030,7 @@ define <8 x i1> @icmp_sle_vi_swap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext ; CHECK-LABEL: icmp_sle_vi_swap_v8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <8 x i1> @llvm.vp.icmp.v8i64(<8 x i64> splat (i64 4), <8 x i64> %va, metadata !"sle", <8 x i1> %m, i32 %evl) ret <8 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll index 657d523..f0fcc48 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,RV64 -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+fast-unaligned-access -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN +; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+unaligned-vector-mem -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64,RV64-MISALIGN ; RUN: llc -mtriple=riscv64 -mattr=+f,+zfh,+zve64f,+zvl128b,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,CHECK-NO-MISALIGN,ZVE64F diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index fffc4d6..36c36a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV32-SLOW ; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=SLOW,RV64-SLOW -; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV32-FAST -; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+unaligned-vector-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=FAST,RV64-FAST define <4 x i32> @load_v4i32_align1(ptr %ptr) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll index 09b9e7c..a2fc114 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfclass-vp.ll @@ -86,10 +86,9 @@ define <8 x i1> @isnan_v8f32(<8 x float> %x, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: isnan_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vfclass.v v10, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 512 -; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f32(<8 x float> %x, i32 2, <8 x i1> %m, i32 %evl) ret <8 x i1> %1 @@ -111,10 +110,9 @@ define <16 x i1> @isnan_v16f32(<16 x float> %x, <16 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isnan_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfclass.v v12, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 256 -; CHECK-NEXT: vmseq.vx v8, v12, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f32(<16 x float> %x, i32 1, <16 x i1> %m, i32 %evl) ret <16 x i1> %1 @@ -162,10 +160,9 @@ define <4 x i1> @isposinf_v4f64(<4 x double> %x, <4 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isposinf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfclass.v v10, v8, v0.t +; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmseq.vx v8, v10, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.vp.is.fpclass.v4f64(<4 x double> %x, i32 512, <4 x i1> %m, i32 %evl) ; 0x200 = "+inf" ret <4 x i1> %1 @@ -187,9 +184,8 @@ define <8 x i1> @isneginf_v8f64(<8 x double> %x, <8 x i1> %m, i32 zeroext %evl) ; CHECK-LABEL: isneginf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vfclass.v v12, v8, v0.t -; CHECK-NEXT: vmseq.vi v8, v12, 1, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfclass.v v8, v8, v0.t +; CHECK-NEXT: vmseq.vi v0, v8, 1, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.vp.is.fpclass.v8f64(<8 x double> %x, i32 4, <8 x i1> %m, i32 %evl) ; "-inf" ret <8 x i1> %1 @@ -212,9 +208,8 @@ define <16 x i1> @isfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 126 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 504, <16 x i1> %m, i32 %evl) ; 0x1f8 = "finite" ret <16 x i1> %1 @@ -239,9 +234,8 @@ define <16 x i1> @isposfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 112 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 448, <16 x i1> %m, i32 %evl) ; 0x1c0 = "+finite" ret <16 x i1> %1 @@ -265,9 +259,8 @@ define <16 x i1> @isnotfinite_v16f64(<16 x double> %x, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfclass.v v8, v8, v0.t ; CHECK-NEXT: li a0, 897 -; CHECK-NEXT: vand.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vx v8, v8, a0, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.vp.is.fpclass.v16f64(<16 x double> %x, i32 519, <16 x i1> %m, i32 %evl) ; 0x207 = "inf|nan" ret <16 x i1> %1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll index 55e1a1d..ab83617 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmp-constrained-sdnode.ll @@ -2575,12 +2575,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2591,12 +2589,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2609,12 +2605,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2626,12 +2620,10 @@ define <16 x i1> @fcmp_oge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2642,12 +2634,10 @@ define <16 x i1> @fcmp_oge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2660,12 +2650,10 @@ define <16 x i1> @fcmp_oge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2677,12 +2665,10 @@ define <16 x i1> @fcmp_olt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2693,12 +2679,10 @@ define <16 x i1> @fcmp_olt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2711,12 +2695,10 @@ define <16 x i1> @fcmp_olt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2728,12 +2710,10 @@ define <16 x i1> @fcmp_ole_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2744,12 +2724,10 @@ define <16 x i1> @fcmp_ole_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2762,12 +2740,10 @@ define <16 x i1> @fcmp_ole_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2779,14 +2755,13 @@ define <16 x i1> @fcmp_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2797,14 +2772,13 @@ define <16 x i1> @fcmp_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2817,14 +2791,13 @@ define <16 x i1> @fcmp_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2836,9 +2809,9 @@ define <16 x i1> @fcmp_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2849,9 +2822,9 @@ define <16 x i1> @fcmp_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2864,9 +2837,9 @@ define <16 x i1> @fcmp_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2878,14 +2851,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2896,14 +2868,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2916,14 +2887,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2935,12 +2905,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2951,12 +2920,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2969,12 +2937,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2986,12 +2953,11 @@ define <16 x i1> @fcmp_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3002,12 +2968,11 @@ define <16 x i1> @fcmp_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3020,12 +2985,11 @@ define <16 x i1> @fcmp_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3037,12 +3001,11 @@ define <16 x i1> @fcmp_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3053,12 +3016,11 @@ define <16 x i1> @fcmp_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3071,12 +3033,11 @@ define <16 x i1> @fcmp_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3088,12 +3049,11 @@ define <16 x i1> @fcmp_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3104,12 +3064,11 @@ define <16 x i1> @fcmp_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3122,12 +3081,11 @@ define <16 x i1> @fcmp_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3173,9 +3131,9 @@ define <16 x i1> @fcmp_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -3186,9 +3144,9 @@ define <16 x i1> @fcmp_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3201,9 +3159,9 @@ define <16 x i1> @fcmp_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -3254,12 +3212,10 @@ define <32 x i1> @fcmp_ogt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3271,12 +3227,10 @@ define <32 x i1> @fcmp_ogt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3290,12 +3244,10 @@ define <32 x i1> @fcmp_ogt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3308,12 +3260,10 @@ define <32 x i1> @fcmp_oge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3325,12 +3275,10 @@ define <32 x i1> @fcmp_oge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3344,12 +3292,10 @@ define <32 x i1> @fcmp_oge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3362,12 +3308,10 @@ define <32 x i1> @fcmp_olt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3379,12 +3323,10 @@ define <32 x i1> @fcmp_olt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3398,12 +3340,10 @@ define <32 x i1> @fcmp_olt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3416,12 +3356,10 @@ define <32 x i1> @fcmp_ole_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3433,12 +3371,10 @@ define <32 x i1> @fcmp_ole_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3452,12 +3388,10 @@ define <32 x i1> @fcmp_ole_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3470,14 +3404,13 @@ define <32 x i1> @fcmp_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3489,14 +3422,13 @@ define <32 x i1> @fcmp_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3510,14 +3442,13 @@ define <32 x i1> @fcmp_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3530,9 +3461,9 @@ define <32 x i1> @fcmp_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3544,9 +3475,9 @@ define <32 x i1> @fcmp_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3560,9 +3491,9 @@ define <32 x i1> @fcmp_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3575,14 +3506,13 @@ define <32 x i1> @fcmp_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3594,14 +3524,13 @@ define <32 x i1> @fcmp_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3615,14 +3544,13 @@ define <32 x i1> @fcmp_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3635,12 +3563,11 @@ define <32 x i1> @fcmp_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3652,12 +3579,11 @@ define <32 x i1> @fcmp_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3671,12 +3597,11 @@ define <32 x i1> @fcmp_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3689,12 +3614,11 @@ define <32 x i1> @fcmp_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3706,12 +3630,11 @@ define <32 x i1> @fcmp_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3725,12 +3648,11 @@ define <32 x i1> @fcmp_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3743,12 +3665,11 @@ define <32 x i1> @fcmp_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3760,12 +3681,11 @@ define <32 x i1> @fcmp_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3779,12 +3699,11 @@ define <32 x i1> @fcmp_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3797,12 +3716,11 @@ define <32 x i1> @fcmp_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3814,12 +3732,11 @@ define <32 x i1> @fcmp_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3833,12 +3750,11 @@ define <32 x i1> @fcmp_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3888,9 +3804,9 @@ define <32 x i1> @fcmp_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmp.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3902,9 +3818,9 @@ define <32 x i1> @fcmp_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3918,9 +3834,9 @@ define <32 x i1> @fcmp_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictfp ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -5866,12 +5782,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ogt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5882,12 +5796,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5900,12 +5812,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5917,12 +5827,10 @@ define <8 x i1> @fcmp_oge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_oge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5933,12 +5841,10 @@ define <8 x i1> @fcmp_oge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5951,12 +5857,10 @@ define <8 x i1> @fcmp_oge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5968,12 +5872,10 @@ define <8 x i1> @fcmp_olt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_olt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5984,12 +5886,10 @@ define <8 x i1> @fcmp_olt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6002,12 +5902,10 @@ define <8 x i1> @fcmp_olt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6019,12 +5917,10 @@ define <8 x i1> @fcmp_ole_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ole_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6035,12 +5931,10 @@ define <8 x i1> @fcmp_ole_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6053,12 +5947,10 @@ define <8 x i1> @fcmp_ole_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6070,14 +5962,13 @@ define <8 x i1> @fcmp_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_one_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6088,14 +5979,13 @@ define <8 x i1> @fcmp_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6108,14 +5998,13 @@ define <8 x i1> @fcmp_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6127,9 +6016,9 @@ define <8 x i1> @fcmp_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ord_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6140,9 +6029,9 @@ define <8 x i1> @fcmp_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6155,9 +6044,9 @@ define <8 x i1> @fcmp_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6169,14 +6058,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ueq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6187,14 +6075,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6207,14 +6094,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6226,12 +6112,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ugt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6242,12 +6127,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6260,12 +6144,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6277,12 +6160,11 @@ define <8 x i1> @fcmp_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_uge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6293,12 +6175,11 @@ define <8 x i1> @fcmp_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6311,12 +6192,11 @@ define <8 x i1> @fcmp_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6328,12 +6208,11 @@ define <8 x i1> @fcmp_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ult_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6344,12 +6223,11 @@ define <8 x i1> @fcmp_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6362,12 +6240,11 @@ define <8 x i1> @fcmp_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6379,12 +6256,11 @@ define <8 x i1> @fcmp_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_ule_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6395,12 +6271,11 @@ define <8 x i1> @fcmp_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6413,12 +6288,11 @@ define <8 x i1> @fcmp_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6464,9 +6338,9 @@ define <8 x i1> @fcmp_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind st ; CHECK-LABEL: fcmp_uno_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -6477,9 +6351,9 @@ define <8 x i1> @fcmp_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6492,9 +6366,9 @@ define <8 x i1> @fcmp_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -6541,12 +6415,10 @@ define <16 x i1> @fcmp_ogt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ogt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6557,12 +6429,10 @@ define <16 x i1> @fcmp_ogt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6575,12 +6445,10 @@ define <16 x i1> @fcmp_ogt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6592,12 +6460,10 @@ define <16 x i1> @fcmp_oge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_oge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6608,12 +6474,10 @@ define <16 x i1> @fcmp_oge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6626,12 +6490,10 @@ define <16 x i1> @fcmp_oge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6643,12 +6505,10 @@ define <16 x i1> @fcmp_olt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_olt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6659,12 +6519,10 @@ define <16 x i1> @fcmp_olt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6677,12 +6535,10 @@ define <16 x i1> @fcmp_olt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6694,12 +6550,10 @@ define <16 x i1> @fcmp_ole_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ole_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6710,12 +6564,10 @@ define <16 x i1> @fcmp_ole_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6728,12 +6580,10 @@ define <16 x i1> @fcmp_ole_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6745,14 +6595,13 @@ define <16 x i1> @fcmp_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_one_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6763,14 +6612,13 @@ define <16 x i1> @fcmp_one_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6783,14 +6631,13 @@ define <16 x i1> @fcmp_one_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6802,9 +6649,9 @@ define <16 x i1> @fcmp_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ord_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6815,9 +6662,9 @@ define <16 x i1> @fcmp_ord_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6830,9 +6677,9 @@ define <16 x i1> @fcmp_ord_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6844,14 +6691,13 @@ define <16 x i1> @fcmp_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ueq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6862,14 +6708,13 @@ define <16 x i1> @fcmp_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6882,14 +6727,13 @@ define <16 x i1> @fcmp_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6901,12 +6745,11 @@ define <16 x i1> @fcmp_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ugt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6917,12 +6760,11 @@ define <16 x i1> @fcmp_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6935,12 +6777,11 @@ define <16 x i1> @fcmp_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6952,12 +6793,11 @@ define <16 x i1> @fcmp_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_uge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -6968,12 +6808,11 @@ define <16 x i1> @fcmp_uge_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6986,12 +6825,11 @@ define <16 x i1> @fcmp_uge_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7003,12 +6841,11 @@ define <16 x i1> @fcmp_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ult_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7019,12 +6856,11 @@ define <16 x i1> @fcmp_ult_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7037,12 +6873,11 @@ define <16 x i1> @fcmp_ult_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7054,12 +6889,11 @@ define <16 x i1> @fcmp_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_ule_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7070,12 +6904,11 @@ define <16 x i1> @fcmp_ule_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7088,12 +6921,11 @@ define <16 x i1> @fcmp_ule_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7139,9 +6971,9 @@ define <16 x i1> @fcmp_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwin ; CHECK-LABEL: fcmp_uno_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -7152,9 +6984,9 @@ define <16 x i1> @fcmp_uno_vf_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -7167,9 +6999,9 @@ define <16 x i1> @fcmp_uno_fv_v16f32(<16 x float> %va, float %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -8482,12 +8314,10 @@ define <4 x i1> @fcmp_ogt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8498,12 +8328,10 @@ define <4 x i1> @fcmp_ogt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8516,12 +8344,10 @@ define <4 x i1> @fcmp_ogt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8533,12 +8359,10 @@ define <4 x i1> @fcmp_oge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8549,12 +8373,10 @@ define <4 x i1> @fcmp_oge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8567,12 +8389,10 @@ define <4 x i1> @fcmp_oge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8584,12 +8404,10 @@ define <4 x i1> @fcmp_olt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8600,12 +8418,10 @@ define <4 x i1> @fcmp_olt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8618,12 +8434,10 @@ define <4 x i1> @fcmp_olt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8635,12 +8449,10 @@ define <4 x i1> @fcmp_ole_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8651,12 +8463,10 @@ define <4 x i1> @fcmp_ole_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8669,12 +8479,10 @@ define <4 x i1> @fcmp_ole_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8686,14 +8494,13 @@ define <4 x i1> @fcmp_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8704,14 +8511,13 @@ define <4 x i1> @fcmp_one_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8724,14 +8530,13 @@ define <4 x i1> @fcmp_one_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8743,9 +8548,9 @@ define <4 x i1> @fcmp_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8756,9 +8561,9 @@ define <4 x i1> @fcmp_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8771,9 +8576,9 @@ define <4 x i1> @fcmp_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8785,14 +8590,13 @@ define <4 x i1> @fcmp_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8803,14 +8607,13 @@ define <4 x i1> @fcmp_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8823,14 +8626,13 @@ define <4 x i1> @fcmp_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8842,12 +8644,11 @@ define <4 x i1> @fcmp_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8858,12 +8659,11 @@ define <4 x i1> @fcmp_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8876,12 +8676,11 @@ define <4 x i1> @fcmp_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8893,12 +8692,11 @@ define <4 x i1> @fcmp_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8909,12 +8707,11 @@ define <4 x i1> @fcmp_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8927,12 +8724,11 @@ define <4 x i1> @fcmp_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8944,12 +8740,11 @@ define <4 x i1> @fcmp_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -8960,12 +8755,11 @@ define <4 x i1> @fcmp_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8978,12 +8772,11 @@ define <4 x i1> @fcmp_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -8995,12 +8788,11 @@ define <4 x i1> @fcmp_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -9011,12 +8803,11 @@ define <4 x i1> @fcmp_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9029,12 +8820,11 @@ define <4 x i1> @fcmp_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9080,9 +8870,9 @@ define <4 x i1> @fcmp_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -9093,9 +8883,9 @@ define <4 x i1> @fcmp_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9108,9 +8898,9 @@ define <4 x i1> @fcmp_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -9157,12 +8947,10 @@ define <8 x i1> @fcmp_ogt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ogt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9173,12 +8961,10 @@ define <8 x i1> @fcmp_ogt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9191,12 +8977,10 @@ define <8 x i1> @fcmp_ogt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9208,12 +8992,10 @@ define <8 x i1> @fcmp_oge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_oge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9224,12 +9006,10 @@ define <8 x i1> @fcmp_oge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9242,12 +9022,10 @@ define <8 x i1> @fcmp_oge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9259,12 +9037,10 @@ define <8 x i1> @fcmp_olt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_olt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9275,12 +9051,10 @@ define <8 x i1> @fcmp_olt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9293,12 +9067,10 @@ define <8 x i1> @fcmp_olt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9310,12 +9082,10 @@ define <8 x i1> @fcmp_ole_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ole_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9326,12 +9096,10 @@ define <8 x i1> @fcmp_ole_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9344,12 +9112,10 @@ define <8 x i1> @fcmp_ole_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9361,14 +9127,13 @@ define <8 x i1> @fcmp_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9379,14 +9144,13 @@ define <8 x i1> @fcmp_one_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9399,14 +9163,13 @@ define <8 x i1> @fcmp_one_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9418,9 +9181,9 @@ define <8 x i1> @fcmp_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9431,9 +9194,9 @@ define <8 x i1> @fcmp_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9446,9 +9209,9 @@ define <8 x i1> @fcmp_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9460,14 +9223,13 @@ define <8 x i1> @fcmp_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9478,14 +9240,13 @@ define <8 x i1> @fcmp_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9498,14 +9259,13 @@ define <8 x i1> @fcmp_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9517,12 +9277,11 @@ define <8 x i1> @fcmp_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9533,12 +9292,11 @@ define <8 x i1> @fcmp_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9551,12 +9309,11 @@ define <8 x i1> @fcmp_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9568,12 +9325,11 @@ define <8 x i1> @fcmp_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9584,12 +9340,11 @@ define <8 x i1> @fcmp_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9602,12 +9357,11 @@ define <8 x i1> @fcmp_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9619,12 +9373,11 @@ define <8 x i1> @fcmp_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9635,12 +9388,11 @@ define <8 x i1> @fcmp_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9653,12 +9405,11 @@ define <8 x i1> @fcmp_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9670,12 +9421,11 @@ define <8 x i1> @fcmp_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9686,12 +9436,11 @@ define <8 x i1> @fcmp_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9704,12 +9453,11 @@ define <8 x i1> @fcmp_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9755,9 +9503,9 @@ define <8 x i1> @fcmp_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmp_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -9768,9 +9516,9 @@ define <8 x i1> @fcmp_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -9783,9 +9531,9 @@ define <8 x i1> @fcmp_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll index 83037ba..e377e37 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfcmps-constrained-sdnode.ll @@ -2137,9 +2137,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2149,9 +2149,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_oeq_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2163,9 +2163,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_oeq_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2313,9 +2313,9 @@ define <16 x i1> @fcmps_one_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2325,9 +2325,9 @@ define <16 x i1> @fcmps_one_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_one_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2339,9 +2339,9 @@ define <16 x i1> @fcmps_one_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_one_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2353,9 +2353,9 @@ define <16 x i1> @fcmps_ord_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2366,9 +2366,9 @@ define <16 x i1> @fcmps_ord_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2381,9 +2381,9 @@ define <16 x i1> @fcmps_ord_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2395,9 +2395,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2407,9 +2407,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ueq_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2421,9 +2421,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ueq_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2435,8 +2435,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2446,8 +2446,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ugt_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2459,8 +2459,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ugt_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2472,8 +2472,8 @@ define <16 x i1> @fcmps_uge_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2483,8 +2483,8 @@ define <16 x i1> @fcmps_uge_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_uge_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2496,8 +2496,8 @@ define <16 x i1> @fcmps_uge_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_uge_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2509,8 +2509,8 @@ define <16 x i1> @fcmps_ult_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2520,8 +2520,8 @@ define <16 x i1> @fcmps_ult_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ult_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2533,8 +2533,8 @@ define <16 x i1> @fcmps_ult_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ult_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2546,8 +2546,8 @@ define <16 x i1> @fcmps_ule_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2557,8 +2557,8 @@ define <16 x i1> @fcmps_ule_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ule_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2570,8 +2570,8 @@ define <16 x i1> @fcmps_ule_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_ule_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2583,9 +2583,9 @@ define <16 x i1> @fcmps_une_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2595,9 +2595,9 @@ define <16 x i1> @fcmps_une_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_une_vf_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2609,9 +2609,9 @@ define <16 x i1> @fcmps_une_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK-LABEL: fcmps_une_fv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2623,10 +2623,10 @@ define <16 x i1> @fcmps_uno_vv_v16f16(<16 x half> %va, <16 x half> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f16(<16 x half> %va, <16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -2637,10 +2637,10 @@ define <16 x i1> @fcmps_uno_vf_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2653,10 +2653,10 @@ define <16 x i1> @fcmps_uno_fv_v16f16(<16 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <16 x half> poison, half %b, i32 0 %splat = shufflevector <16 x half> %head, <16 x half> poison, <16 x i32> zeroinitializer @@ -2670,9 +2670,9 @@ define <32 x i1> @fcmps_oeq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2683,9 +2683,9 @@ define <32 x i1> @fcmps_oeq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2698,9 +2698,9 @@ define <32 x i1> @fcmps_oeq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2861,9 +2861,9 @@ define <32 x i1> @fcmps_one_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2874,9 +2874,9 @@ define <32 x i1> @fcmps_one_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2889,9 +2889,9 @@ define <32 x i1> @fcmps_one_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2904,9 +2904,9 @@ define <32 x i1> @fcmps_ord_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2918,9 +2918,9 @@ define <32 x i1> @fcmps_ord_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2934,9 +2934,9 @@ define <32 x i1> @fcmps_ord_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2949,9 +2949,9 @@ define <32 x i1> @fcmps_ueq_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -2962,9 +2962,9 @@ define <32 x i1> @fcmps_ueq_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2977,9 +2977,9 @@ define <32 x i1> @fcmps_ueq_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -2992,8 +2992,8 @@ define <32 x i1> @fcmps_ugt_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3004,8 +3004,8 @@ define <32 x i1> @fcmps_ugt_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3018,8 +3018,8 @@ define <32 x i1> @fcmps_ugt_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3032,8 +3032,8 @@ define <32 x i1> @fcmps_uge_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3044,8 +3044,8 @@ define <32 x i1> @fcmps_uge_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3058,8 +3058,8 @@ define <32 x i1> @fcmps_uge_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3072,8 +3072,8 @@ define <32 x i1> @fcmps_ult_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3084,8 +3084,8 @@ define <32 x i1> @fcmps_ult_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3098,8 +3098,8 @@ define <32 x i1> @fcmps_ult_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3112,8 +3112,8 @@ define <32 x i1> @fcmps_ule_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3124,8 +3124,8 @@ define <32 x i1> @fcmps_ule_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3138,8 +3138,8 @@ define <32 x i1> @fcmps_ule_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3152,9 +3152,9 @@ define <32 x i1> @fcmps_une_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3165,9 +3165,9 @@ define <32 x i1> @fcmps_une_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3180,9 +3180,9 @@ define <32 x i1> @fcmps_une_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3195,10 +3195,10 @@ define <32 x i1> @fcmps_uno_vv_v32f16(<32 x half> %va, <32 x half> %vb) nounwind ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <32 x i1> @llvm.experimental.constrained.fcmps.v32f16(<32 x half> %va, <32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <32 x i1> %1 @@ -3210,10 +3210,10 @@ define <32 x i1> @fcmps_uno_vf_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -3227,10 +3227,10 @@ define <32 x i1> @fcmps_uno_fv_v32f16(<32 x half> %va, half %b) nounwind strictf ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmfle.vf v8, v12, fa0 -; CHECK-NEXT: vmnot.m v8, v8 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmfle.vf v4, v12, fa0 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <32 x half> poison, half %b, i32 0 %splat = shufflevector <32 x half> %head, <32 x half> poison, <32 x i32> zeroinitializer @@ -4839,9 +4839,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_oeq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -4851,9 +4851,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_oeq_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -4865,9 +4865,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_oeq_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5015,9 +5015,9 @@ define <8 x i1> @fcmps_one_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_one_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5027,9 +5027,9 @@ define <8 x i1> @fcmps_one_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_one_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5041,9 +5041,9 @@ define <8 x i1> @fcmps_one_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_one_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5055,9 +5055,9 @@ define <8 x i1> @fcmps_ord_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ord_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5068,9 +5068,9 @@ define <8 x i1> @fcmps_ord_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5083,9 +5083,9 @@ define <8 x i1> @fcmps_ord_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5097,9 +5097,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ueq_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5109,9 +5109,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ueq_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5123,9 +5123,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ueq_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5137,8 +5137,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ugt_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5148,8 +5148,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ugt_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5161,8 +5161,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ugt_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5174,8 +5174,8 @@ define <8 x i1> @fcmps_uge_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_uge_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5185,8 +5185,8 @@ define <8 x i1> @fcmps_uge_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_uge_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5198,8 +5198,8 @@ define <8 x i1> @fcmps_uge_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_uge_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5211,8 +5211,8 @@ define <8 x i1> @fcmps_ult_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ult_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5222,8 +5222,8 @@ define <8 x i1> @fcmps_ult_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ult_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5235,8 +5235,8 @@ define <8 x i1> @fcmps_ult_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ult_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5248,8 +5248,8 @@ define <8 x i1> @fcmps_ule_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_ule_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5259,8 +5259,8 @@ define <8 x i1> @fcmps_ule_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ule_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5272,8 +5272,8 @@ define <8 x i1> @fcmps_ule_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_ule_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5285,9 +5285,9 @@ define <8 x i1> @fcmps_une_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_une_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5297,9 +5297,9 @@ define <8 x i1> @fcmps_une_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_une_vf_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5311,9 +5311,9 @@ define <8 x i1> @fcmps_une_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK-LABEL: fcmps_une_fv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5325,10 +5325,10 @@ define <8 x i1> @fcmps_uno_vv_v8f32(<8 x float> %va, <8 x float> %vb) nounwind s ; CHECK-LABEL: fcmps_uno_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %va, <8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -5339,10 +5339,10 @@ define <8 x i1> @fcmps_uno_vf_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5355,10 +5355,10 @@ define <8 x i1> @fcmps_uno_fv_v8f32(<8 x float> %va, float %b) nounwind strictfp ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <8 x float> poison, float %b, i32 0 %splat = shufflevector <8 x float> %head, <8 x float> poison, <8 x i32> zeroinitializer @@ -5371,9 +5371,9 @@ define <16 x i1> @fcmps_oeq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_oeq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5383,9 +5383,9 @@ define <16 x i1> @fcmps_oeq_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_oeq_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5397,9 +5397,9 @@ define <16 x i1> @fcmps_oeq_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_oeq_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5547,9 +5547,9 @@ define <16 x i1> @fcmps_one_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_one_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5559,9 +5559,9 @@ define <16 x i1> @fcmps_one_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_one_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5573,9 +5573,9 @@ define <16 x i1> @fcmps_one_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_one_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5587,9 +5587,9 @@ define <16 x i1> @fcmps_ord_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ord_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5600,9 +5600,9 @@ define <16 x i1> @fcmps_ord_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5615,9 +5615,9 @@ define <16 x i1> @fcmps_ord_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5629,9 +5629,9 @@ define <16 x i1> @fcmps_ueq_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ueq_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5641,9 +5641,9 @@ define <16 x i1> @fcmps_ueq_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ueq_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5655,9 +5655,9 @@ define <16 x i1> @fcmps_ueq_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ueq_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5669,8 +5669,8 @@ define <16 x i1> @fcmps_ugt_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ugt_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5680,8 +5680,8 @@ define <16 x i1> @fcmps_ugt_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ugt_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5693,8 +5693,8 @@ define <16 x i1> @fcmps_ugt_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ugt_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5706,8 +5706,8 @@ define <16 x i1> @fcmps_uge_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_uge_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5717,8 +5717,8 @@ define <16 x i1> @fcmps_uge_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_uge_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5730,8 +5730,8 @@ define <16 x i1> @fcmps_uge_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_uge_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5743,8 +5743,8 @@ define <16 x i1> @fcmps_ult_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ult_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5754,8 +5754,8 @@ define <16 x i1> @fcmps_ult_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ult_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5767,8 +5767,8 @@ define <16 x i1> @fcmps_ult_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ult_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5780,8 +5780,8 @@ define <16 x i1> @fcmps_ule_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_ule_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5791,8 +5791,8 @@ define <16 x i1> @fcmps_ule_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ule_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5804,8 +5804,8 @@ define <16 x i1> @fcmps_ule_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_ule_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5817,9 +5817,9 @@ define <16 x i1> @fcmps_une_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_une_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5829,9 +5829,9 @@ define <16 x i1> @fcmps_une_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_une_vf_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5843,9 +5843,9 @@ define <16 x i1> @fcmps_une_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK-LABEL: fcmps_une_fv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5857,10 +5857,10 @@ define <16 x i1> @fcmps_uno_vv_v16f32(<16 x float> %va, <16 x float> %vb) nounwi ; CHECK-LABEL: fcmps_uno_vv_v16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %va, <16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <16 x i1> %1 @@ -5871,10 +5871,10 @@ define <16 x i1> @fcmps_uno_vf_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -5887,10 +5887,10 @@ define <16 x i1> @fcmps_uno_fv_v16f32(<16 x float> %va, float %b) nounwind stric ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <16 x float> poison, float %b, i32 0 %splat = shufflevector <16 x float> %head, <16 x float> poison, <16 x i32> zeroinitializer @@ -6967,9 +6967,9 @@ define <4 x i1> @fcmps_oeq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -6979,9 +6979,9 @@ define <4 x i1> @fcmps_oeq_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -6993,9 +6993,9 @@ define <4 x i1> @fcmps_oeq_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7143,9 +7143,9 @@ define <4 x i1> @fcmps_one_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7155,9 +7155,9 @@ define <4 x i1> @fcmps_one_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7169,9 +7169,9 @@ define <4 x i1> @fcmps_one_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7183,9 +7183,9 @@ define <4 x i1> @fcmps_ord_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7196,9 +7196,9 @@ define <4 x i1> @fcmps_ord_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7211,9 +7211,9 @@ define <4 x i1> @fcmps_ord_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7225,9 +7225,9 @@ define <4 x i1> @fcmps_ueq_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7237,9 +7237,9 @@ define <4 x i1> @fcmps_ueq_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7251,9 +7251,9 @@ define <4 x i1> @fcmps_ueq_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7265,8 +7265,8 @@ define <4 x i1> @fcmps_ugt_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7276,8 +7276,8 @@ define <4 x i1> @fcmps_ugt_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7289,8 +7289,8 @@ define <4 x i1> @fcmps_ugt_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7302,8 +7302,8 @@ define <4 x i1> @fcmps_uge_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7313,8 +7313,8 @@ define <4 x i1> @fcmps_uge_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7326,8 +7326,8 @@ define <4 x i1> @fcmps_uge_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7339,8 +7339,8 @@ define <4 x i1> @fcmps_ult_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7350,8 +7350,8 @@ define <4 x i1> @fcmps_ult_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7363,8 +7363,8 @@ define <4 x i1> @fcmps_ult_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7376,8 +7376,8 @@ define <4 x i1> @fcmps_ule_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7387,8 +7387,8 @@ define <4 x i1> @fcmps_ule_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7400,8 +7400,8 @@ define <4 x i1> @fcmps_ule_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7413,9 +7413,9 @@ define <4 x i1> @fcmps_une_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7425,9 +7425,9 @@ define <4 x i1> @fcmps_une_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_vf_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7439,9 +7439,9 @@ define <4 x i1> @fcmps_une_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_fv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7453,10 +7453,10 @@ define <4 x i1> @fcmps_uno_vv_v4f64(<4 x double> %va, <4 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %va, <4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <4 x i1> %1 @@ -7467,10 +7467,10 @@ define <4 x i1> @fcmps_uno_vf_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7483,10 +7483,10 @@ define <4 x i1> @fcmps_uno_fv_v4f64(<4 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <4 x double> poison, double %b, i32 0 %splat = shufflevector <4 x double> %head, <4 x double> poison, <4 x i32> zeroinitializer @@ -7499,9 +7499,9 @@ define <8 x i1> @fcmps_oeq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_oeq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7511,9 +7511,9 @@ define <8 x i1> @fcmps_oeq_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7525,9 +7525,9 @@ define <8 x i1> @fcmps_oeq_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_oeq_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7675,9 +7675,9 @@ define <8 x i1> @fcmps_one_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_one_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7687,9 +7687,9 @@ define <8 x i1> @fcmps_one_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7701,9 +7701,9 @@ define <8 x i1> @fcmps_one_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_one_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7715,9 +7715,9 @@ define <8 x i1> @fcmps_ord_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ord_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7728,9 +7728,9 @@ define <8 x i1> @fcmps_ord_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7743,9 +7743,9 @@ define <8 x i1> @fcmps_ord_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7757,9 +7757,9 @@ define <8 x i1> @fcmps_ueq_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ueq_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7769,9 +7769,9 @@ define <8 x i1> @fcmps_ueq_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7783,9 +7783,9 @@ define <8 x i1> @fcmps_ueq_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ueq_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7797,8 +7797,8 @@ define <8 x i1> @fcmps_ugt_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ugt_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7808,8 +7808,8 @@ define <8 x i1> @fcmps_ugt_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7821,8 +7821,8 @@ define <8 x i1> @fcmps_ugt_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ugt_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7834,8 +7834,8 @@ define <8 x i1> @fcmps_uge_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uge_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7845,8 +7845,8 @@ define <8 x i1> @fcmps_uge_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7858,8 +7858,8 @@ define <8 x i1> @fcmps_uge_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_uge_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7871,8 +7871,8 @@ define <8 x i1> @fcmps_ult_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ult_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7882,8 +7882,8 @@ define <8 x i1> @fcmps_ult_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7895,8 +7895,8 @@ define <8 x i1> @fcmps_ult_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ult_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7908,8 +7908,8 @@ define <8 x i1> @fcmps_ule_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_ule_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7919,8 +7919,8 @@ define <8 x i1> @fcmps_ule_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7932,8 +7932,8 @@ define <8 x i1> @fcmps_ule_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_ule_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7945,9 +7945,9 @@ define <8 x i1> @fcmps_une_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_une_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7957,9 +7957,9 @@ define <8 x i1> @fcmps_une_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_vf_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7971,9 +7971,9 @@ define <8 x i1> @fcmps_une_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK-LABEL: fcmps_une_fv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -7985,10 +7985,10 @@ define <8 x i1> @fcmps_uno_vv_v8f64(<8 x double> %va, <8 x double> %vb) nounwind ; CHECK-LABEL: fcmps_uno_vv_v8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %va, <8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <8 x i1> %1 @@ -7999,10 +7999,10 @@ define <8 x i1> @fcmps_uno_vf_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer @@ -8015,10 +8015,10 @@ define <8 x i1> @fcmps_uno_fv_v8f64(<8 x double> %va, double %b) nounwind strict ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <8 x double> poison, double %b, i32 0 %splat = shufflevector <8 x double> %head, <8 x double> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index e65decf..67570f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -291,16 +291,16 @@ define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> % ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v10, (a0), v16, v0.t -; RV64-NEXT: addi a2, a1, -16 -; RV64-NEXT: sltu a1, a1, a2 -; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: addi a2, a1, -16 +; RV64-NEXT: sltu a1, a1, a2 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64-NEXT: vluxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: li a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 466448a..e3ab3b6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -26,8 +26,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vid.v v10 -; RV64-NEXT: vmsltu.vx v12, v10, a0 -; RV64-NEXT: vmand.mm v9, v9, v12 +; RV64-NEXT: vmsltu.vx v2, v10, a0 +; RV64-NEXT: vmand.mm v9, v9, v2 ; RV64-NEXT: vmandn.mm v8, v8, v9 ; RV64-NEXT: vmand.mm v9, v0, v9 ; RV64-NEXT: vmor.mm v0, v9, v8 @@ -48,8 +48,8 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze ; RV64ZVFHMIN: # %bb.0: ; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64ZVFHMIN-NEXT: vid.v v10 -; RV64ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0 -; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v12 +; RV64ZVFHMIN-NEXT: vmsltu.vx v2, v10, a0 +; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v2 ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9 ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9 ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index fb04d53..cb502de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -447,15 +447,29 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index 777e005..5979814 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -140,11 +140,9 @@ define <vscale x 8 x half> @vp_floor_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -184,11 +182,9 @@ define <vscale x 16 x half> @vp_floor_nxv16f16(<vscale x 16 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -228,11 +224,9 @@ define <vscale x 32 x half> @vp_floor_nxv32f16(<vscale x 32 x half> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -356,11 +350,9 @@ define <vscale x 4 x float> @vp_floor_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -400,11 +392,9 @@ define <vscale x 8 x float> @vp_floor_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -444,11 +434,9 @@ define <vscale x 16 x float> @vp_floor_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -530,11 +518,9 @@ define <vscale x 2 x double> @vp_floor_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -574,11 +560,9 @@ define <vscale x 4 x double> @vp_floor_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -618,11 +602,9 @@ define <vscale x 7 x double> @vp_floor_nxv7f64(<vscale x 7 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -662,11 +644,9 @@ define <vscale x 8 x double> @vp_floor_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -716,11 +696,9 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 2 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -734,11 +712,9 @@ define <vscale x 16 x double> @vp_floor_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 2 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 82d1123..24ce8a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -201,9 +201,8 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 ; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll index b78b866..d92bf09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll @@ -13,32 +13,33 @@ declare <vscale x 1 x half> @llvm.vp.maximum.nxv1f16(<vscale x 1 x half>, <vscal define <vscale x 1 x half> @vfmax_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define <vscale x 1 x half> @vfmax_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <vscale x 2 x half> @llvm.vp.maximum.nxv2f16(<vscale x 2 x half>, <vscal define <vscale x 2 x half> @vfmax_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define <vscale x 2 x half> @vfmax_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <vscale x 4 x half> @llvm.vp.maximum.nxv4f16(<vscale x 4 x half>, <vscal define <vscale x 4 x half> @vfmax_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmax.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v11 +; ZVFH-NEXT: vfmax.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: @@ -214,11 +210,10 @@ define <vscale x 4 x half> @vfmax_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <vscale x 8 x half> @llvm.vp.maximum.nxv8f16(<vscale x 8 x half>, <vscal define <vscale x 8 x half> @vfmax_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmax.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v14 +; ZVFH-NEXT: vfmax.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: @@ -290,11 +281,10 @@ define <vscale x 8 x half> @vfmax_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmax.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,52 +297,36 @@ declare <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half>, <vs define <vscale x 16 x half> @vfmax_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmax.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v @@ -363,21 +337,14 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmax.vv v8, v8, v20 +; ZVFH-NEXT: vfmax.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -385,20 +352,12 @@ define <vscale x 16 x half> @vfmax_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.maximum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v @@ -409,30 +368,15 @@ declare <vscale x 32 x half> @llvm.vp.maximum.nxv32f16(<vscale x 32 x half>, <vs define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmax_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmax.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmax.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmax_vv_nxv32f16: @@ -440,17 +384,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 34 +; ZVFHMIN-NEXT: li a2, 26 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -459,135 +396,99 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v5, v8 +; ZVFHMIN-NEXT: vmv1r.v v6, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v24, v24, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 +; ZVFHMIN-NEXT: li a1, 26 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -601,9 +502,8 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmax.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -613,9 +513,10 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -626,82 +527,80 @@ define <vscale x 32 x half> @vfmax_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmax.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -714,15 +613,15 @@ declare <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float>, <vsc define <vscale x 1 x float> @vfmax_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x float> %v @@ -733,11 +632,10 @@ define <vscale x 1 x float> @vfmax_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.maximum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x float> %v @@ -748,15 +646,15 @@ declare <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float>, <vsc define <vscale x 2 x float> @vfmax_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x float> %v @@ -767,11 +665,10 @@ define <vscale x 2 x float> @vfmax_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.maximum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x float> %v @@ -782,17 +679,15 @@ declare <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float>, <vsc define <vscale x 4 x float> @vfmax_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x float> %v @@ -803,11 +698,10 @@ define <vscale x 4 x float> @vfmax_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.maximum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x float> %v @@ -818,17 +712,15 @@ declare <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float>, <vsc define <vscale x 8 x float> @vfmax_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x float> %v @@ -839,11 +731,10 @@ define <vscale x 8 x float> @vfmax_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.maximum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x float> %v @@ -854,15 +745,15 @@ declare <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double>, <v define <vscale x 1 x double> @vfmax_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmax.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x double> %v @@ -873,11 +764,10 @@ define <vscale x 1 x double> @vfmax_vv_nxv1f64_unmasked(<vscale x 1 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v11 +; CHECK-NEXT: vfmax.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.maximum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x double> %v @@ -888,17 +778,15 @@ declare <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double>, <v define <vscale x 2 x double> @vfmax_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmax.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x double> %v @@ -909,11 +797,10 @@ define <vscale x 2 x double> @vfmax_vv_nxv2f64_unmasked(<vscale x 2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v14 +; CHECK-NEXT: vfmax.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.maximum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x double> %v @@ -924,17 +811,15 @@ declare <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double>, <v define <vscale x 4 x double> @vfmax_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x double> %v @@ -945,11 +830,10 @@ define <vscale x 4 x double> @vfmax_vv_nxv4f64_unmasked(<vscale x 4 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v20 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.maximum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x double> %v @@ -960,30 +844,15 @@ declare <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double>, <v define <vscale x 8 x double> @vfmax_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.maximum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x double> %v @@ -994,9 +863,8 @@ define <vscale x 8 x double> @vfmax_vv_nxv8f64_unmasked(<vscale x 8 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1012,79 +880,43 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v24, v24, v0.t +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfmax.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: @@ -1092,49 +924,27 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1174,9 +984,8 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 @@ -1197,9 +1006,8 @@ define <vscale x 16 x double> @vfmax_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 69ad7b4..198d3a9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -201,9 +201,8 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmfeq.vv v3, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0 ; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll index 69c7615..828cc57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll @@ -13,32 +13,33 @@ declare <vscale x 1 x half> @llvm.vp.minimum.nxv1f16(<vscale x 1 x half>, <vscal define <vscale x 1 x half> @vfmin_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -52,11 +53,10 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: @@ -66,12 +66,11 @@ define <vscale x 1 x half> @vfmin_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -85,32 +84,33 @@ declare <vscale x 2 x half> @llvm.vp.minimum.nxv2f16(<vscale x 2 x half>, <vscal define <vscale x 2 x half> @vfmin_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v11, v11, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v12, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v11, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v11, v8, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmv1r.v v0, v12 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v11, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v11 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -124,11 +124,10 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: @@ -138,12 +137,11 @@ define <vscale x 2 x half> @vfmin_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v11, v11 -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v11, v0 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v11, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 ; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 @@ -157,35 +155,34 @@ declare <vscale x 4 x half> @llvm.vp.minimum.nxv4f16(<vscale x 4 x half>, <vscal define <vscale x 4 x half> @vfmin_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vmv1r.v v11, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmv1r.v v0, v11 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v10 -; ZVFH-NEXT: vfmin.vv v8, v8, v11, v0.t +; ZVFH-NEXT: vmv1r.v v0, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v10, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: vmv1r.v v14, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v16, v12, v14, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v14, v14, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v14, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v10 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v14 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -198,11 +195,10 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v10, v9, v9 -; ZVFH-NEXT: vmerge.vvm v11, v8, v9, v0 -; ZVFH-NEXT: vmv.v.v v0, v10 +; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v11 +; ZVFH-NEXT: vfmin.vv v8, v8, v10 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: @@ -214,11 +210,10 @@ define <vscale x 4 x half> @vfmin_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v14, v10, v12, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v10, v8, v14 +; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 ; ZVFHMIN-NEXT: ret @@ -231,37 +226,34 @@ declare <vscale x 8 x half> @llvm.vp.minimum.nxv8f16(<vscale x 8 x half>, <vscal define <vscale x 8 x half> @vfmin_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vmv1r.v v14, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v13, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vmfeq.vv v13, v10, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v13 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 -; ZVFH-NEXT: vfmin.vv v8, v8, v14, v0.t +; ZVFH-NEXT: vmv1r.v v0, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: vmv1r.v v12, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmv1r.v v20, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v20, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmfeq.vv v8, v20, v20, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v20, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v20 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -274,11 +266,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmerge.vvm v14, v8, v10, v0 -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmerge.vvm v12, v8, v10, v0 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 ; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v14 +; ZVFH-NEXT: vfmin.vv v8, v8, v12 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: @@ -290,11 +281,10 @@ define <vscale x 8 x half> @vfmin_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, < ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v20, v12, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 -; ZVFHMIN-NEXT: vfmin.vv v12, v8, v20 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 +; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 ; ZVFHMIN-NEXT: ret @@ -307,52 +297,36 @@ declare <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half>, <vs define <vscale x 16 x half> @vfmin_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vmv1r.v v20, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; ZVFH-NEXT: vmfeq.vv v17, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vmfeq.vv v17, v12, v12, v0.t -; ZVFH-NEXT: vmv1r.v v0, v17 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 -; ZVFH-NEXT: vfmin.vv v8, v8, v20, v0.t +; ZVFH-NEXT: vmv1r.v v0, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v7 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl) ret <vscale x 16 x half> %v @@ -363,21 +337,14 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v16, v12, v12 -; ZVFH-NEXT: vmerge.vvm v20, v8, v12, v0 -; ZVFH-NEXT: vmv1r.v v0, v16 +; ZVFH-NEXT: vmerge.vvm v16, v8, v12, v0 +; ZVFH-NEXT: vmfeq.vv v0, v12, v12 ; ZVFH-NEXT: vmerge.vvm v8, v12, v8, v0 -; ZVFH-NEXT: vfmin.vv v8, v8, v20 +; ZVFH-NEXT: vfmin.vv v8, v8, v16 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv16f16_unmasked: ; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: addi sp, sp, -16 -; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 3 -; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma @@ -385,20 +352,12 @@ define <vscale x 16 x half> @vfmin_vv_nxv16f16_unmasked(<vscale x 16 x half> %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v7, v24, v24 ; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: addi a0, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16 +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add sp, sp, a0 -; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret %v = call <vscale x 16 x half> @llvm.vp.minimum.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl) ret <vscale x 16 x half> %v @@ -409,30 +368,15 @@ declare <vscale x 32 x half> @llvm.vp.minimum.nxv32f16(<vscale x 32 x half>, <vs define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vfmin_vv_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v25, v8, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vmfeq.vv v25, v16, v16, v0.t -; ZVFH-NEXT: vmv1r.v v0, v25 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16, v0.t ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vmv1r.v v0, v7 -; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vfmin.vv v8, v8, v16, v0.t -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: addi sp, sp, 16 +; ZVFH-NEXT: vfmin.vv v8, v8, v24, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vfmin_vv_nxv32f16: @@ -440,17 +384,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 34 +; ZVFHMIN-NEXT: li a2, 26 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vmv8r.v v16, v8 +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -459,135 +396,99 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: and a3, a4, a3 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: csrr a4, vlenb -; ZVFHMIN-NEXT: li a5, 24 -; ZVFHMIN-NEXT: mul a4, a4, a5 -; ZVFHMIN-NEXT: add a4, sp, a4 -; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vslidedown.vx v0, v0, a2 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v0, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v5, v8 +; ZVFHMIN-NEXT: vmv1r.v v6, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v4 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v16, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: vmfeq.vv v12, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v0, v5 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 +; ZVFHMIN-NEXT: addi a2, sp, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB10_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB10_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: csrr a1, vlenb ; ZVFHMIN-NEXT: li a2, 24 ; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t -; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 -; ZVFHMIN-NEXT: add a1, sp, a1 -; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: addi a1, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 25 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v24, v24, v16, v0 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 24 -; ZVFHMIN-NEXT: mul a0, a0, a1 +; ZVFHMIN-NEXT: slli a0, a0, 4 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmv1r.v v0, v9 -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 -; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 25 +; ZVFHMIN-NEXT: li a1, 24 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 3 -; ZVFHMIN-NEXT: add a0, sp, a0 -; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: addi a0, sp, 16 ; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 34 +; ZVFHMIN-NEXT: li a1, 26 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -601,9 +502,8 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 -; ZVFH-NEXT: vmfeq.vv v7, v16, v16 ; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFH-NEXT: vmv1r.v v0, v7 +; ZVFH-NEXT: vmfeq.vv v0, v16, v16 ; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0 ; ZVFH-NEXT: vfmin.vv v8, v8, v24 ; ZVFH-NEXT: ret @@ -613,9 +513,10 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 5 +; ZVFHMIN-NEXT: li a2, 24 +; ZVFHMIN-NEXT: mul a1, a1, a2 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: sub a3, a0, a1 @@ -626,82 +527,80 @@ define <vscale x 32 x half> @vfmin_vv_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v24 ; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v7, v24, a2 +; ZVFHMIN-NEXT: vslidedown.vx v0, v24, a2 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 4 +; ZVFHMIN-NEXT: slli a2, a2, 3 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v24, v0.t +; ZVFHMIN-NEXT: vmv1r.v v6, v8 +; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vmv4r.v v8, v16 ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 24 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a2, a2, 4 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v20 ; ZVFHMIN-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: addi a2, sp, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v24, v0 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmv1r.v v0, v6 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: vmfeq.vv v12, v24, v24, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v12 -; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v16, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 ; ZVFHMIN-NEXT: vmv1r.v v0, v7 -; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: slli a2, a2, 3 -; ZVFHMIN-NEXT: add a2, sp, a2 -; ZVFHMIN-NEXT: addi a2, a2, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8, v0.t +; ZVFHMIN-NEXT: vfmin.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli a2, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v4, v16 +; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16 +; ZVFHMIN-NEXT: addi a2, sp, 16 +; ZVFHMIN-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a0, a1, .LBB11_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a0, a1 ; ZVFHMIN-NEXT: .LBB11_2: ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a1, a1, 4 +; ZVFHMIN-NEXT: slli a1, a1, 3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 +; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 +; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v16 ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a2, 24 -; ZVFHMIN-NEXT: mul a1, a1, a2 +; ZVFHMIN-NEXT: slli a1, a1, 4 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v3, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0 -; ZVFHMIN-NEXT: vmv1r.v v0, v3 -; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v8, v0 +; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmv8r.v v8, v16 +; ZVFHMIN-NEXT: vmerge.vvm v16, v16, v24, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v24 +; ZVFHMIN-NEXT: vmerge.vvm v16, v24, v8, v0 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a0, a0, 4 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24 ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v0, v16 -; ZVFHMIN-NEXT: vmv8r.v v8, v0 +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: slli a0, a0, 5 +; ZVFHMIN-NEXT: li a1, 24 +; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 ; ZVFHMIN-NEXT: ret @@ -714,15 +613,15 @@ declare <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float>, <vsc define <vscale x 1 x float> @vfmin_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x float> %v @@ -733,11 +632,10 @@ define <vscale x 1 x float> @vfmin_vv_nxv1f32_unmasked(<vscale x 1 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x float> @llvm.vp.minimum.nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x float> %v @@ -748,15 +646,15 @@ declare <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float>, <vsc define <vscale x 2 x float> @vfmin_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x float> %v @@ -767,11 +665,10 @@ define <vscale x 2 x float> @vfmin_vv_nxv2f32_unmasked(<vscale x 2 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 2 x float> @llvm.vp.minimum.nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x float> %v @@ -782,17 +679,15 @@ declare <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float>, <vsc define <vscale x 4 x float> @vfmin_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x float> %v @@ -803,11 +698,10 @@ define <vscale x 4 x float> @vfmin_vv_nxv4f32_unmasked(<vscale x 4 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 4 x float> @llvm.vp.minimum.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x float> %v @@ -818,17 +712,15 @@ declare <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float>, <vsc define <vscale x 8 x float> @vfmin_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x float> %v @@ -839,11 +731,10 @@ define <vscale x 8 x float> @vfmin_vv_nxv8f32_unmasked(<vscale x 8 x float> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x float> @llvm.vp.minimum.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl) ret <vscale x 8 x float> %v @@ -854,15 +745,15 @@ declare <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double>, <v define <vscale x 1 x double> @vfmin_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v11, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vfmin.vv v8, v8, v11, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10, v0.t ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 %evl) ret <vscale x 1 x double> %v @@ -873,11 +764,10 @@ define <vscale x 1 x double> @vfmin_vv_nxv1f64_unmasked(<vscale x 1 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v10, v9, v9 -; CHECK-NEXT: vmerge.vvm v11, v8, v9, v0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmerge.vvm v10, v8, v9, v0 +; CHECK-NEXT: vmfeq.vv v0, v9, v9 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v11 +; CHECK-NEXT: vfmin.vv v8, v8, v10 ; CHECK-NEXT: ret %v = call <vscale x 1 x double> @llvm.vp.minimum.nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl) ret <vscale x 1 x double> %v @@ -888,17 +778,15 @@ declare <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double>, <v define <vscale x 2 x double> @vfmin_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vmv1r.v v14, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v13, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vv v13, v10, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v0, v10, v10, v0.t ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vfmin.vv v8, v8, v14, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x double> %v @@ -909,11 +797,10 @@ define <vscale x 2 x double> @vfmin_vv_nxv2f64_unmasked(<vscale x 2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmerge.vvm v14, v8, v10, v0 -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vvm v12, v8, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 ; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v14 +; CHECK-NEXT: vfmin.vv v8, v8, v12 ; CHECK-NEXT: ret %v = call <vscale x 2 x double> @llvm.vp.minimum.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl) ret <vscale x 2 x double> %v @@ -924,17 +811,15 @@ declare <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double>, <v define <vscale x 4 x double> @vfmin_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vmv1r.v v20, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v17, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmfeq.vv v17, v12, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v0, v12, v12, v0.t ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v20, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 %evl) ret <vscale x 4 x double> %v @@ -945,11 +830,10 @@ define <vscale x 4 x double> @vfmin_vv_nxv4f64_unmasked(<vscale x 4 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmerge.vvm v20, v8, v12, v0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmerge.vvm v16, v8, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v20 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: ret %v = call <vscale x 4 x double> @llvm.vp.minimum.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl) ret <vscale x 4 x double> %v @@ -960,30 +844,15 @@ declare <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double>, <v define <vscale x 8 x double> @vfmin_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x double> @llvm.vp.minimum.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x double> %v @@ -994,9 +863,8 @@ define <vscale x 8 x double> @vfmin_vv_nxv8f64_unmasked(<vscale x 8 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: ret @@ -1012,79 +880,43 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: slli a3, a3, 4 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a4, a4, 5 -; CHECK-NEXT: add a4, sp, a4 -; CHECK-NEXT: addi a4, a4, 16 -; CHECK-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v24, v0, a3 +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v6, v0, a3 ; CHECK-NEXT: sub a3, a2, a1 ; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a4, a0, 5 -; CHECK-NEXT: add a0, a4, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v7, v24 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: vmfeq.vv v26, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v26 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v17, v24, v24, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a0, a0, a3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t +; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmfeq.vv v0, v24, v24, v0.t +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vfmin.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a2, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: @@ -1092,49 +924,27 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: .LBB28_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v25, v16, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v16, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 5 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 @@ -1174,9 +984,8 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: addi a0, sp, 16 @@ -1197,9 +1006,8 @@ define <vscale x 16 x double> @vfmin_vv_nxv16f64_unmasked(<vscale x 16 x double> ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 ; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll index 485f94ee2..53598c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST ; ---------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll index 0e7e914..accc185 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memset-inline.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32 ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64 -; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+unaligned-scalar-mem,,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+fast-unaligned-access \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+unaligned-scalar-mem,+unaligned-vector-mem \ ; RUN: | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 0e09f59..1e38700 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1757,19 +1757,33 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vsc ; ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vl2r.v v6, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v7 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsext.vf8 v24, v6 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: vsext.vf8 v8, v7 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) @@ -1797,19 +1811,33 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vs ; ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV64-NEXT: vl4re16.v v4, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf4 v24, v6 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsext.vf4 v24, v4 ; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v24, v0.t -; RV64-NEXT: vsext.vf4 v8, v6 -; RV64-NEXT: vsll.vi v8, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index fadb4a1..05d6bc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_nearbyint_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t ; ZVFH-NEXT: fsflags a0 @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_nearbyint_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: fsflags a0 @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_nearbyint_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: fsflags a0 @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a0 @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_nearbyint_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: fsflags a2 @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_nearbyint_nxv4f32(<vscale x 4 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_nearbyint_nxv8f32(<vscale x 8 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_nearbyint_nxv16f32(<vscale x 16 x float> %va, < ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_nearbyint_nxv2f64(<vscale x 2 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: fsflags a0 @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_nearbyint_nxv4f64(<vscale x 4 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: fsflags a0 @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_nearbyint_nxv7f64(<vscale x 7 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_nearbyint_nxv8f64(<vscale x 8 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: fsflags a0 @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a2 @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_nearbyint_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: fsflags a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr88799.ll b/llvm/test/CodeGen/RISCV/rvv/pr88799.ll new file mode 100644 index 0000000..7212a78 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr88799.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=riscv64-unknown-linux-gnu -mattr=+v | FileCheck %s + +define i32 @main() vscale_range(2,2) { +; CHECK-LABEL: main: +; CHECK: # %bb.0: # %vector.body +; CHECK-NEXT: lui a0, 1040368 +; CHECK-NEXT: addiw a0, a0, -144 +; CHECK-NEXT: vl2re16.v v8, (a0) +; CHECK-NEXT: vs2r.v v8, (zero) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +vector.body: + %0 = load <16 x i16>, ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store <16 x i16> %0, ptr null, align 2 + %wide.load = load <vscale x 8 x i16>, ptr getelementptr ([3 x [23 x [23 x i16]]], ptr null, i64 -10593, i64 1, i64 22, i64 0), align 16 + store <vscale x 8 x i16> %wide.load, ptr null, align 2 + ret i32 0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll index 796e6dd..9ba3da9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -183,10 +183,8 @@ define <vscale x 4 x half> @vp_rint_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -242,10 +240,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu @@ -261,10 +257,8 @@ define <vscale x 8 x half> @vp_rint_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -320,10 +314,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu @@ -339,10 +331,8 @@ define <vscale x 16 x half> @vp_rint_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -398,10 +388,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu @@ -427,10 +415,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -446,10 +432,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -495,10 +479,8 @@ define <vscale x 32 x half> @vp_rint_nxv32f16_unmasked(<vscale x 32 x half> %va, ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -610,10 +592,8 @@ define <vscale x 4 x float> @vp_rint_nxv4f32(<vscale x 4 x float> %va, <vscale x ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu @@ -650,10 +630,8 @@ define <vscale x 8 x float> @vp_rint_nxv8f32(<vscale x 8 x float> %va, <vscale x ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -690,10 +668,8 @@ define <vscale x 16 x float> @vp_rint_nxv16f32(<vscale x 16 x float> %va, <vscal ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -768,10 +744,8 @@ define <vscale x 2 x double> @vp_rint_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -808,10 +782,8 @@ define <vscale x 4 x double> @vp_rint_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu @@ -848,10 +820,8 @@ define <vscale x 7 x double> @vp_rint_nxv7f64(<vscale x 7 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -888,10 +858,8 @@ define <vscale x 8 x double> @vp_rint_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -938,10 +906,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu @@ -954,10 +920,8 @@ define <vscale x 16 x double> @vp_rint_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll index 43fd54c..b3fe6bb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_round_nxv4f16(<vscale x 4 x half> %va, <vscale x ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_round_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_round_nxv16f16(<vscale x 16 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 4 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16(<vscale x 32 x half> %va, <vscale ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_round_nxv32f16_unmasked(<vscale x 32 x half> %va ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 4 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_round_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_round_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_round_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_round_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_round_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_round_nxv7f64(<vscale x 7 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_round_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 4 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_round_nxv16f64(<vscale x 16 x double> %va, <vs ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 4 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index 8214159..4dba53dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_roundeven_nxv4f16(<vscale x 4 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_roundeven_nxv8f16(<vscale x 8 x half> %va, <vscal ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_roundeven_nxv16f16(<vscale x 16 x half> %va, <vs ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 0 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16(<vscale x 32 x half> %va, <vs ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_roundeven_nxv32f16_unmasked(<vscale x 32 x half> ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 0 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_roundeven_nxv4f32(<vscale x 4 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_roundeven_nxv8f32(<vscale x 8 x float> %va, <vsc ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_roundeven_nxv16f32(<vscale x 16 x float> %va, < ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_roundeven_nxv2f64(<vscale x 2 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_roundeven_nxv4f64(<vscale x 4 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_roundeven_nxv7f64(<vscale x 7 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_roundeven_nxv8f64(<vscale x 8 x double> %va, <v ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_roundeven_nxv16f64(<vscale x 16 x double> %va, ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll index e8ee307..109149f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -201,11 +201,9 @@ define <vscale x 4 x half> @vp_roundtozero_nxv4f16(<vscale x 4 x half> %va, <vsc ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -266,11 +264,9 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; ZVFH-NEXT: vmv1r.v v13, v0 -; ZVFH-NEXT: vmflt.vf v13, v10, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v10, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v13 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -287,11 +283,9 @@ define <vscale x 8 x half> @vp_roundtozero_nxv8f16(<vscale x 8 x half> %va, <vsc ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v17, v0 -; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -352,11 +346,9 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, < ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; ZVFH-NEXT: vmv1r.v v17, v0 -; ZVFH-NEXT: vmflt.vf v17, v12, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v12, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v17 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -373,11 +365,9 @@ define <vscale x 16 x half> @vp_roundtozero_nxv16f16(<vscale x 16 x half> %va, < ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v25, v0 -; ZVFHMIN-NEXT: vmflt.vf v25, v8, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v25 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -438,11 +428,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu -; ZVFH-NEXT: vmv1r.v v25, v0 -; ZVFH-NEXT: vmflt.vf v25, v16, fa5, v0.t +; ZVFH-NEXT: vmflt.vf v0, v16, fa5, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: fsrmi a0, 1 -; ZVFH-NEXT: vmv1r.v v0, v25 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -469,11 +457,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -490,11 +476,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16(<vscale x 32 x half> %va, < ; ZVFHMIN-NEXT: vmv1r.v v0, v7 ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v9, v7 -; ZVFHMIN-NEXT: vmflt.vf v9, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a0, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v9 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -543,11 +527,9 @@ define <vscale x 32 x half> @vp_roundtozero_nxv32f16_unmasked(<vscale x 32 x hal ; ZVFHMIN-NEXT: lui a2, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; ZVFHMIN-NEXT: vmv1r.v v13, v0 -; ZVFHMIN-NEXT: vmflt.vf v13, v24, fa5, v0.t +; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: fsrmi a2, 1 -; ZVFHMIN-NEXT: vmv1r.v v0, v13 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a2 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -670,11 +652,9 @@ define <vscale x 4 x float> @vp_roundtozero_nxv4f32(<vscale x 4 x float> %va, <v ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -714,11 +694,9 @@ define <vscale x 8 x float> @vp_roundtozero_nxv8f32(<vscale x 8 x float> %va, <v ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -758,11 +736,9 @@ define <vscale x 16 x float> @vp_roundtozero_nxv16f32(<vscale x 16 x float> %va, ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -844,11 +820,9 @@ define <vscale x 2 x double> @vp_roundtozero_nxv2f64(<vscale x 2 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v10, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v10, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v13 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -888,11 +862,9 @@ define <vscale x 4 x double> @vp_roundtozero_nxv4f64(<vscale x 4 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v12, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v12, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v17 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -932,11 +904,9 @@ define <vscale x 7 x double> @vp_roundtozero_nxv7f64(<vscale x 7 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -976,11 +946,9 @@ define <vscale x 8 x double> @vp_roundtozero_nxv8f64(<vscale x 8 x double> %va, ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v16, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1030,11 +998,9 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v5, v0 -; CHECK-NEXT: vmflt.vf v5, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v5 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a2 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1048,11 +1014,9 @@ define <vscale x 16 x double> @vp_roundtozero_nxv16f64(<vscale x 16 x double> %v ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmv1r.v v6, v7 -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t +; CHECK-NEXT: vmflt.vf v0, v24, fa5, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll index 10b77e5..df6d752 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -133,9 +133,9 @@ define <vscale x 16 x i1> @select_nxv16i1(i1 zeroext %c, <vscale x 16 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v9, v10, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v2, v10, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v2 +; CHECK-NEXT: vmand.mm v9, v0, v2 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b @@ -149,9 +149,9 @@ define <vscale x 16 x i1> @selectcc_nxv16i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v9, v10, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v2, v10, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v2 +; CHECK-NEXT: vmand.mm v9, v0, v2 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b @@ -164,9 +164,9 @@ define <vscale x 32 x i1> @select_nxv32i1(i1 zeroext %c, <vscale x 32 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v4, v12, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v4 +; CHECK-NEXT: vmand.mm v9, v0, v4 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 32 x i1> %a, <vscale x 32 x i1> %b @@ -180,9 +180,9 @@ define <vscale x 32 x i1> @selectcc_nxv32i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v4, v12, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v4 +; CHECK-NEXT: vmand.mm v9, v0, v4 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b @@ -195,9 +195,9 @@ define <vscale x 64 x i1> @select_nxv64i1(i1 zeroext %c, <vscale x 64 x i1> %a, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v9, v16, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v16, v16, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v16 +; CHECK-NEXT: vmand.mm v9, v0, v16 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %v = select i1 %c, <vscale x 64 x i1> %a, <vscale x 64 x i1> %b @@ -211,9 +211,9 @@ define <vscale x 64 x i1> @selectcc_nxv64i1(i1 signext %a, i1 signext %b, <vscal ; CHECK-NEXT: andi a0, a0, 1 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v9, v16, 0 -; CHECK-NEXT: vmandn.mm v8, v8, v9 -; CHECK-NEXT: vmand.mm v9, v0, v9 +; CHECK-NEXT: vmsne.vi v16, v16, 0 +; CHECK-NEXT: vmandn.mm v8, v8, v16 +; CHECK-NEXT: vmand.mm v9, v0, v16 ; CHECK-NEXT: vmor.mm v0, v9, v8 ; CHECK-NEXT: ret %cmp = icmp ne i1 %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index cddd371..ee939d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1083,8 +1083,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f16(<vscale x 3 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v10, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f16(<vscale x 3 x half> %va, <vscale x 3 x half> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl) ret <vscale x 3 x i1> %v @@ -1096,8 +1095,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_oeq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfeq.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vv_nxv8f16: @@ -1106,8 +1104,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1117,8 +1114,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_oeq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vf_nxv8f16: @@ -1131,8 +1127,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1144,8 +1139,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_oeq_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vf_swap_nxv8f16: @@ -1158,8 +1152,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1171,8 +1164,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ogt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vv_nxv8f16: @@ -1181,8 +1173,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1192,8 +1183,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ogt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vf_nxv8f16: @@ -1206,8 +1196,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1219,8 +1208,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ogt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ogt_vf_swap_nxv8f16: @@ -1233,8 +1221,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1246,8 +1233,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_oge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vv_nxv8f16: @@ -1256,8 +1242,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1267,8 +1252,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_oge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vf_nxv8f16: @@ -1281,8 +1265,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1294,8 +1277,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_oge_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oge_vf_swap_nxv8f16: @@ -1308,8 +1290,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1321,8 +1302,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_olt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vv_nxv8f16: @@ -1331,8 +1311,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1342,8 +1321,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_olt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_nxv8f16: @@ -1356,8 +1334,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1369,8 +1346,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_olt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_olt_vf_swap_nxv8f16: @@ -1383,8 +1359,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1396,8 +1371,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ole_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vv_nxv8f16: @@ -1406,8 +1380,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1417,8 +1390,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ole_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vf_nxv8f16: @@ -1431,8 +1403,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1444,8 +1415,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ole_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ole_vf_swap_nxv8f16: @@ -1458,8 +1428,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1471,9 +1440,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_one_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmflt.vv v13, v10, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v2, v8, v10, v0.t +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16: @@ -1482,9 +1451,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1494,9 +1463,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_one_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16: @@ -1509,9 +1478,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1523,9 +1492,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_one_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmflt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_swap_nxv8f16: @@ -1538,9 +1507,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1552,9 +1521,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v2, v10, v10, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16: @@ -1562,12 +1531,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1579,9 +1548,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16: @@ -1593,12 +1562,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1612,9 +1581,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfeq.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmand.mm v0, v12, v10 +; ZVFH-NEXT: vmfeq.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfeq.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_swap_nxv8f16: @@ -1626,12 +1595,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1643,9 +1612,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmflt.vv v13, v10, v8, v0.t -; ZVFH-NEXT: vmnor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v2, v8, v10, v0.t +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16: @@ -1654,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1666,9 +1635,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16: @@ -1681,9 +1650,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1695,9 +1664,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ueq_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmflt.vf v11, v8, fa0, v0.t -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_swap_nxv8f16: @@ -1710,9 +1679,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12, v0.t +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1724,8 +1693,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16: @@ -1734,8 +1703,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1745,8 +1714,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16: @@ -1759,8 +1728,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1772,8 +1741,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ugt_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_swap_nxv8f16: @@ -1786,8 +1755,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1799,8 +1768,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16: @@ -1809,8 +1778,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1820,8 +1789,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16: @@ -1834,8 +1803,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1847,8 +1816,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_uge_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_swap_nxv8f16: @@ -1861,8 +1830,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1874,8 +1843,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16: @@ -1884,8 +1853,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1895,8 +1864,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16: @@ -1909,8 +1878,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1922,8 +1891,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ult_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_swap_nxv8f16: @@ -1936,8 +1905,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1949,8 +1918,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8, v0.t -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16: @@ -1959,8 +1928,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1970,8 +1939,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16: @@ -1984,8 +1953,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1997,8 +1966,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_ule_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0, v0.t +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_swap_nxv8f16: @@ -2011,8 +1980,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16, v0.t +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2024,8 +1993,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_une_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v8, v10, v0.t -; ZVFH-NEXT: vmv1r.v v0, v12 +; ZVFH-NEXT: vmfne.vv v0, v8, v10, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vv_nxv8f16: @@ -2034,8 +2002,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2045,8 +2012,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-LABEL: fcmp_une_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfne.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vf_nxv8f16: @@ -2059,8 +2025,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v16, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v16, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2072,8 +2037,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-LABEL: fcmp_une_vf_swap_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v10, v8, fa0, v0.t -; ZVFH-NEXT: vmv1r.v v0, v10 +; ZVFH-NEXT: vmfne.vf v0, v8, fa0, v0.t ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_une_vf_swap_nxv8f16: @@ -2086,8 +2050,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v16, v12, v0.t -; ZVFHMIN-NEXT: vmv1r.v v0, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v16, v12, v0.t ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2099,9 +2062,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v2, v10, v10, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16: @@ -2109,12 +2072,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2126,9 +2089,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16: @@ -2140,12 +2103,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b, ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2159,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vf v12, v10, fa0, v0.t -; ZVFH-NEXT: vmfne.vv v10, v8, v8, v0.t -; ZVFH-NEXT: vmor.mm v0, v12, v10 +; ZVFH-NEXT: vmfne.vf v2, v10, fa0, v0.t +; ZVFH-NEXT: vmfne.vv v0, v8, v8, v0.t +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_swap_nxv8f16: @@ -2173,12 +2136,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f16(<vscale x 8 x half> %va, half ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12, v0.t +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12, v0.t ; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12, v0.t -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12, v0.t +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %elt.head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %vb = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2199,35 +2162,34 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; ZVFH-NEXT: addi a1, sp, 16 ; ZVFH-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: vsetvli a3, zero, e8, m1, ta, ma -; ZVFH-NEXT: slli a3, a1, 3 -; ZVFH-NEXT: add a3, a0, a3 -; ZVFH-NEXT: vl8re16.v v8, (a3) -; ZVFH-NEXT: slli a3, a1, 2 +; ZVFH-NEXT: csrr a3, vlenb +; ZVFH-NEXT: srli a1, a3, 1 +; ZVFH-NEXT: vsetvli a4, zero, e8, m1, ta, ma +; ZVFH-NEXT: vmv1r.v v7, v0 +; ZVFH-NEXT: vslidedown.vx v0, v0, a1 +; ZVFH-NEXT: slli a4, a3, 3 +; ZVFH-NEXT: add a4, a0, a4 +; ZVFH-NEXT: vl8re16.v v24, (a4) +; ZVFH-NEXT: slli a3, a3, 2 ; ZVFH-NEXT: sub a4, a2, a3 ; ZVFH-NEXT: sltu a5, a2, a4 ; ZVFH-NEXT: addi a5, a5, -1 +; ZVFH-NEXT: vl8re16.v v8, (a0) ; ZVFH-NEXT: and a4, a5, a4 -; ZVFH-NEXT: srli a1, a1, 1 -; ZVFH-NEXT: vl8re16.v v24, (a0) -; ZVFH-NEXT: vmv1r.v v6, v0 -; ZVFH-NEXT: vslidedown.vx v0, v0, a1 ; ZVFH-NEXT: vsetvli zero, a4, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v7, v16, v8, v0.t +; ZVFH-NEXT: vmfeq.vv v16, v16, v24, v0.t ; ZVFH-NEXT: bltu a2, a3, .LBB85_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a2, a3 ; ZVFH-NEXT: .LBB85_2: ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; ZVFH-NEXT: vmv1r.v v0, v6 +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: vmfeq.vv v16, v8, v24, v0.t +; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmfeq.vv v0, v24, v8, v0.t ; ZVFH-NEXT: add a0, a1, a1 ; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vx v16, v7, a1 -; ZVFH-NEXT: vmv.v.v v0, v16 +; ZVFH-NEXT: vslideup.vx v0, v16, a1 ; ZVFH-NEXT: csrr a0, vlenb ; ZVFH-NEXT: slli a0, a0, 3 ; ZVFH-NEXT: add sp, sp, a0 @@ -2239,13 +2201,13 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi sp, sp, -16 ; ZVFHMIN-NEXT: .cfi_def_cfa_offset 16 ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: li a3, 26 +; ZVFHMIN-NEXT: li a3, 34 ; ZVFHMIN-NEXT: mul a1, a1, a3 ; ZVFHMIN-NEXT: sub sp, sp, a1 -; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; ZVFHMIN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; ZVFHMIN-NEXT: csrr a1, vlenb -; ZVFHMIN-NEXT: slli a3, a1, 3 -; ZVFHMIN-NEXT: add a1, a3, a1 +; ZVFHMIN-NEXT: li a3, 25 +; ZVFHMIN-NEXT: mul a1, a1, a3 ; ZVFHMIN-NEXT: add a1, sp, a1 ; ZVFHMIN-NEXT: addi a1, a1, 16 ; ZVFHMIN-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill @@ -2266,8 +2228,8 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: and a7, a7, a1 ; ZVFHMIN-NEXT: srli a1, a3, 1 ; ZVFHMIN-NEXT: csrr t0, vlenb -; ZVFHMIN-NEXT: li t1, 25 -; ZVFHMIN-NEXT: mul t0, t0, t1 +; ZVFHMIN-NEXT: slli t1, t0, 5 +; ZVFHMIN-NEXT: add t0, t1, t0 ; ZVFHMIN-NEXT: add t0, sp, t0 ; ZVFHMIN-NEXT: addi t0, t0, 16 ; ZVFHMIN-NEXT: vs1r.v v0, (t0) # Unknown-size Folded Spill @@ -2289,100 +2251,129 @@ define <vscale x 64 x i1> @fcmp_oeq_vv_nxv64f16(<vscale x 64 x half> %va, <vscal ; ZVFHMIN-NEXT: addi a0, a0, 16 ; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli t0, a0, 3 +; ZVFHMIN-NEXT: add a0, t0, a0 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: bltu a6, a4, .LBB85_2 ; ZVFHMIN-NEXT: # %bb.1: ; ZVFHMIN-NEXT: mv a6, a4 ; ZVFHMIN-NEXT: .LBB85_2: ; ZVFHMIN-NEXT: vsetvli zero, a7, e32, m8, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v20, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v8, v0.t +; ZVFHMIN-NEXT: addi a0, sp, 16 +; ZVFHMIN-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v24 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a7, a0, 3 +; ZVFHMIN-NEXT: add a0, a7, a0 +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, a6, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmv1r.v v0, v6 -; ZVFHMIN-NEXT: vmfeq.vv v16, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v24, v0.t ; ZVFHMIN-NEXT: add a0, a3, a3 ; ZVFHMIN-NEXT: bltu a2, a5, .LBB85_4 ; ZVFHMIN-NEXT: # %bb.3: ; ZVFHMIN-NEXT: mv a2, a5 ; ZVFHMIN-NEXT: .LBB85_4: ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v16, v20, a3 +; ZVFHMIN-NEXT: addi a5, sp, 16 +; ZVFHMIN-NEXT: vl1r.v v8, (a5) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a3 ; ZVFHMIN-NEXT: csrr a5, vlenb +; ZVFHMIN-NEXT: slli a6, a5, 3 +; ZVFHMIN-NEXT: add a5, a6, a5 ; ZVFHMIN-NEXT: add a5, sp, a5 ; ZVFHMIN-NEXT: addi a5, a5, 16 -; ZVFHMIN-NEXT: vs1r.v v16, (a5) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: sub a5, a2, a4 ; ZVFHMIN-NEXT: sltu a6, a2, a5 ; ZVFHMIN-NEXT: addi a6, a6, -1 ; ZVFHMIN-NEXT: and a5, a6, a5 ; ZVFHMIN-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: li a7, 25 -; ZVFHMIN-NEXT: mul a6, a6, a7 +; ZVFHMIN-NEXT: slli a7, a6, 5 +; ZVFHMIN-NEXT: add a6, a7, a6 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 ; ZVFHMIN-NEXT: vl1r.v v8, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vslidedown.vx v8, v8, a3 -; ZVFHMIN-NEXT: addi a6, sp, 16 -; ZVFHMIN-NEXT: vs1r.v v8, (a6) # Unknown-size Folded Spill +; ZVFHMIN-NEXT: vslidedown.vx v0, v8, a3 ; ZVFHMIN-NEXT: vsetvli a6, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: csrr a6, vlenb -; ZVFHMIN-NEXT: slli a7, a6, 3 -; ZVFHMIN-NEXT: add a6, a7, a6 +; ZVFHMIN-NEXT: li a7, 25 +; ZVFHMIN-NEXT: mul a6, a6, a7 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 +; ZVFHMIN-NEXT: vl8r.v v24, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v28 ; ZVFHMIN-NEXT: csrr a6, vlenb ; ZVFHMIN-NEXT: slli a7, a6, 4 ; ZVFHMIN-NEXT: add a6, a7, a6 ; ZVFHMIN-NEXT: add a6, sp, a6 ; ZVFHMIN-NEXT: addi a6, a6, 16 -; ZVFHMIN-NEXT: vl8r.v v0, (a6) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v4 +; ZVFHMIN-NEXT: vl8r.v v16, (a6) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v20 ; ZVFHMIN-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; ZVFHMIN-NEXT: addi a5, sp, 16 -; ZVFHMIN-NEXT: vl1r.v v0, (a5) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v6, v24, v8, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v24, v0.t +; ZVFHMIN-NEXT: csrr a5, vlenb +; ZVFHMIN-NEXT: add a5, sp, a5 +; ZVFHMIN-NEXT: addi a5, a5, 16 +; ZVFHMIN-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill ; ZVFHMIN-NEXT: bltu a2, a4, .LBB85_6 ; ZVFHMIN-NEXT: # %bb.5: ; ZVFHMIN-NEXT: mv a2, a4 ; ZVFHMIN-NEXT: .LBB85_6: ; ZVFHMIN-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16 +; ZVFHMIN-NEXT: csrr a4, vlenb +; ZVFHMIN-NEXT: li a5, 25 +; ZVFHMIN-NEXT: mul a4, a4, a5 +; ZVFHMIN-NEXT: add a4, sp, a4 +; ZVFHMIN-NEXT: addi a4, a4, 16 +; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: csrr a4, vlenb ; ZVFHMIN-NEXT: slli a5, a4, 4 ; ZVFHMIN-NEXT: add a4, a5, a4 ; ZVFHMIN-NEXT: add a4, sp, a4 ; ZVFHMIN-NEXT: addi a4, a4, 16 -; ZVFHMIN-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v16 ; ZVFHMIN-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; ZVFHMIN-NEXT: csrr a2, vlenb -; ZVFHMIN-NEXT: li a4, 25 -; ZVFHMIN-NEXT: mul a2, a2, a4 +; ZVFHMIN-NEXT: slli a4, a2, 5 +; ZVFHMIN-NEXT: add a2, a4, a2 ; ZVFHMIN-NEXT: add a2, sp, a2 ; ZVFHMIN-NEXT: addi a2, a2, 16 ; ZVFHMIN-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vmfeq.vv v8, v24, v16, v0.t +; ZVFHMIN-NEXT: vmfeq.vv v0, v24, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v8, v6, a3 +; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: add a0, sp, a0 +; ZVFHMIN-NEXT: addi a0, a0, 16 +; ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a3 ; ZVFHMIN-NEXT: add a0, a1, a1 ; ZVFHMIN-NEXT: vsetvli zero, a0, e8, m1, ta, ma ; ZVFHMIN-NEXT: csrr a0, vlenb +; ZVFHMIN-NEXT: slli a2, a0, 3 +; ZVFHMIN-NEXT: add a0, a2, a0 ; ZVFHMIN-NEXT: add a0, sp, a0 ; ZVFHMIN-NEXT: addi a0, a0, 16 -; ZVFHMIN-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload -; ZVFHMIN-NEXT: vslideup.vx v8, v9, a1 -; ZVFHMIN-NEXT: vmv.v.v v0, v8 +; ZVFHMIN-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload +; ZVFHMIN-NEXT: vslideup.vx v0, v8, a1 ; ZVFHMIN-NEXT: csrr a0, vlenb -; ZVFHMIN-NEXT: li a1, 26 +; ZVFHMIN-NEXT: li a1, 34 ; ZVFHMIN-NEXT: mul a0, a0, a1 ; ZVFHMIN-NEXT: add sp, sp, a0 ; ZVFHMIN-NEXT: addi sp, sp, 16 @@ -2919,8 +2910,7 @@ define <vscale x 3 x i1> @fcmp_oeq_vv_nxv3f64(<vscale x 3 x double> %va, <vscale ; CHECK-LABEL: fcmp_oeq_vv_nxv3f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 3 x i1> @llvm.vp.fcmp.nxv3f64(<vscale x 3 x double> %va, <vscale x 3 x double> %vb, metadata !"oeq", <vscale x 3 x i1> %m, i32 %evl) ret <vscale x 3 x i1> %v @@ -2932,8 +2922,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2943,8 +2932,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_oeq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2956,8 +2944,7 @@ define <vscale x 8 x i1> @fcmp_oeq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_oeq_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfeq.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2969,8 +2956,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2980,8 +2966,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ogt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2993,8 +2978,7 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ogt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3006,8 +2990,7 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3017,8 +3000,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_oge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3030,8 +3012,7 @@ define <vscale x 8 x i1> @fcmp_oge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_oge_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3043,8 +3024,7 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3054,8 +3034,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_olt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3067,8 +3046,7 @@ define <vscale x 8 x i1> @fcmp_olt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_olt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3080,8 +3058,7 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3091,8 +3068,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ole_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3104,8 +3080,7 @@ define <vscale x 8 x i1> @fcmp_ole_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ole_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3118,8 +3093,8 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3130,8 +3105,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3144,8 +3119,8 @@ define <vscale x 8 x i1> @fcmp_one_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3157,9 +3132,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v16, v16, v16, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3171,9 +3146,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3187,9 +3162,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfeq.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3202,8 +3177,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3214,8 +3189,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3228,8 +3203,8 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3241,8 +3216,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3252,8 +3227,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3265,8 +3240,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ugt_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3278,8 +3253,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3289,8 +3264,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3302,8 +3277,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_uge_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3315,8 +3290,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3326,8 +3301,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3339,8 +3314,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ult_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3352,8 +3327,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3363,8 +3338,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3376,8 +3351,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_ule_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3389,8 +3364,7 @@ define <vscale x 8 x i1> @fcmp_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_une_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmfne.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3400,8 +3374,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_une_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3413,8 +3386,7 @@ define <vscale x 8 x i1> @fcmp_une_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-LABEL: fcmp_une_vf_swap_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmfne.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3426,9 +3398,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v16, v16, v16, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3440,9 +3412,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3456,9 +3428,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_swap_nxv8f64(<vscale x 8 x double> %va, do ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vf v24, v16, fa0, v0.t -; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v16, v16, fa0, v0.t +; CHECK-NEXT: vmfne.vv v0, v8, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %vb = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3474,12 +3446,12 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 34 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x22, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 34 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 25 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 @@ -3490,7 +3462,7 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: slli t1, a3, 3 ; CHECK-NEXT: srli a4, a3, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a4 +; CHECK-NEXT: vslidedown.vx v7, v0, a4 ; CHECK-NEXT: srli a1, a3, 3 ; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma ; CHECK-NEXT: add a5, a2, t1 @@ -3506,111 +3478,103 @@ define <vscale x 32 x i1> @fcmp_oeq_vv_nxv32f64(<vscale x 32 x double> %va, <vsc ; CHECK-NEXT: .LBB171_2: ; CHECK-NEXT: add t0, a2, t0 ; CHECK-NEXT: add t1, a0, t1 -; CHECK-NEXT: vslidedown.vx v5, v6, a1 +; CHECK-NEXT: vslidedown.vx v6, v7, a1 ; CHECK-NEXT: add t2, a2, t2 ; CHECK-NEXT: vl8re64.v v24, (a2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: sub a2, a7, a3 ; CHECK-NEXT: sltu t3, a7, a2 ; CHECK-NEXT: addi t3, t3, -1 ; CHECK-NEXT: and a2, t3, a2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a3, .LBB171_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a7, a3 ; CHECK-NEXT: .LBB171_4: ; CHECK-NEXT: vl8re64.v v8, (t0) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: slli t0, a2, 4 +; CHECK-NEXT: add a2, t0, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (t1) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li t0, 24 -; CHECK-NEXT: mul a2, a2, t0 +; CHECK-NEXT: slli t0, a2, 3 +; CHECK-NEXT: add a2, t0, a2 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (t2) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: li a2, 25 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vmfeq.vv v16, v24, v8, v0.t +; CHECK-NEXT: vmfeq.vv v24, v8, v24, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: sub a0, a6, a5 ; CHECK-NEXT: sltu a2, a6, a0 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vslideup.vx v16, v7, a1 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl1r.v v0, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vslideup.vx v24, v0, a1 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a3, .LBB171_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB171_6: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: add a2, a4, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vx v16, v17, a4 +; CHECK-NEXT: vslideup.vx v24, v8, a4 ; CHECK-NEXT: sub a2, a0, a3 ; CHECK-NEXT: sltu a0, a0, a2 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v5 +; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: slli a2, a0, 4 +; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a2, a0, 3 +; CHECK-NEXT: add a0, a2, a0 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v17, v8, v24, v0.t +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v8, v16, v8, v0.t ; CHECK-NEXT: slli a0, a1, 1 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v17, a0 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v24, v8, a0 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: li a1, 34 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll index aee2551..5d14143 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll @@ -580,9 +580,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_one_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmflt.vv v13, v10, v8 -; ZVFH-NEXT: vmor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmflt.vv v2, v10, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vv_nxv8f16: @@ -591,9 +591,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp one <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -603,9 +603,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_one_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0 -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_vf_nxv8f16: @@ -618,9 +618,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -632,9 +632,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_one_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmflt.vf v11, v8, fa0 -; ZVFH-NEXT: vmor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_one_fv_nxv8f16: @@ -647,9 +647,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -705,9 +705,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16: @@ -715,12 +715,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ord <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -731,9 +731,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16: @@ -745,12 +745,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -763,9 +763,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v12, v10 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_fv_nxv8f16: @@ -777,12 +777,12 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -794,9 +794,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFH-LABEL: fcmp_ord_vv_nxv8f16_nonans: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfeq.vv v12, v10, v10 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vv v0, v10, v10 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vv_nxv8f16_nonans: @@ -804,12 +804,12 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ord <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -820,9 +820,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfeq.vf v12, v10, fa0 -; ZVFH-NEXT: vmfeq.vv v10, v8, v8 -; ZVFH-NEXT: vmand.mm v0, v10, v12 +; ZVFH-NEXT: vmfeq.vf v0, v10, fa0 +; ZVFH-NEXT: vmfeq.vv v2, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ord_vf_nxv8f16_nonans: @@ -834,12 +834,12 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmand.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -851,9 +851,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ueq_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmflt.vv v13, v10, v8 -; ZVFH-NEXT: vmnor.mm v0, v13, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmflt.vv v2, v10, v8 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vv_nxv8f16: @@ -862,9 +862,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ueq <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -874,9 +874,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ueq_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmfgt.vf v11, v8, fa0 -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmfgt.vf v2, v8, fa0 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_vf_nxv8f16: @@ -889,9 +889,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmflt.vv v9, v16, v12 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmflt.vv v4, v16, v12 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -903,9 +903,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ueq_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmflt.vf v11, v8, fa0 -; ZVFH-NEXT: vmnor.mm v0, v11, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmflt.vf v2, v8, fa0 +; ZVFH-NEXT: vmnor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ueq_fv_nxv8f16: @@ -918,9 +918,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmflt.vv v9, v12, v16 -; ZVFHMIN-NEXT: vmnor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmflt.vv v4, v12, v16 +; ZVFHMIN-NEXT: vmnor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -976,8 +976,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ugt_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v8, v10 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v8, v10 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vv_nxv8f16: @@ -986,8 +986,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ugt <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -997,8 +997,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ugt_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_vf_nxv8f16: @@ -1011,8 +1011,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1024,8 +1024,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ugt_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ugt_fv_nxv8f16: @@ -1038,8 +1038,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1095,8 +1095,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uge_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v8, v10 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v8, v10 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vv_nxv8f16: @@ -1105,8 +1105,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uge <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1116,8 +1116,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_uge_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_vf_nxv8f16: @@ -1130,8 +1130,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1143,8 +1143,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_uge_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uge_fv_nxv8f16: @@ -1157,8 +1157,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1214,8 +1214,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ult_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vv v12, v10, v8 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmfle.vv v0, v10, v8 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vv_nxv8f16: @@ -1224,8 +1224,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ult <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1235,8 +1235,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ult_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfge.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfge.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_vf_nxv8f16: @@ -1249,8 +1249,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1262,8 +1262,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ult_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfle.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfle.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ult_fv_nxv8f16: @@ -1276,8 +1276,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfle.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmfle.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1333,8 +1333,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_ule_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vv v12, v10, v8 -; ZVFH-NEXT: vmnot.m v0, v12 +; ZVFH-NEXT: vmflt.vv v0, v10, v8 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vv_nxv8f16: @@ -1343,8 +1343,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp ule <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1354,8 +1354,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ule_vf_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfgt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmfgt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_vf_nxv8f16: @@ -1368,8 +1368,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v16, v12 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1381,8 +1381,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH-LABEL: fcmp_ule_fv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmflt.vf v10, v8, fa0 -; ZVFH-NEXT: vmnot.m v0, v10 +; ZVFH-NEXT: vmflt.vf v0, v8, fa0 +; ZVFH-NEXT: vmnot.m v0, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_ule_fv_nxv8f16: @@ -1395,8 +1395,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmflt.vv v8, v12, v16 -; ZVFHMIN-NEXT: vmnot.m v0, v8 +; ZVFHMIN-NEXT: vmflt.vv v0, v12, v16 +; ZVFHMIN-NEXT: vmnot.m v0, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1565,9 +1565,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v0, v10, v10 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16: @@ -1575,12 +1575,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uno <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1591,9 +1591,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16: @@ -1605,12 +1605,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1623,9 +1623,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v12, v10 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v0, v2 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_fv_nxv8f16: @@ -1637,12 +1637,12 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1654,9 +1654,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFH-LABEL: fcmp_uno_vv_nxv8f16_nonans: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; ZVFH-NEXT: vmfne.vv v12, v10, v10 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vv v0, v10, v10 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vv_nxv8f16_nonans: @@ -1664,12 +1664,12 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16_nonans(<vscale x 8 x half> %va, <v ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v10, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v10 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v4, v0 ; ZVFHMIN-NEXT: ret %vc = fcmp uno <vscale x 8 x half> %va, %vb ret <vscale x 8 x i1> %vc @@ -1680,9 +1680,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfmv.v.f v10, fa0 -; ZVFH-NEXT: vmfne.vf v12, v10, fa0 -; ZVFH-NEXT: vmfne.vv v10, v8, v8 -; ZVFH-NEXT: vmor.mm v0, v10, v12 +; ZVFH-NEXT: vmfne.vf v0, v10, fa0 +; ZVFH-NEXT: vmfne.vv v2, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v2, v0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_uno_vf_nxv8f16_nonans: @@ -1694,12 +1694,12 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16_nonans(<vscale x 8 x half> %va, ha ; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v12 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v8, v12, v12 +; ZVFHMIN-NEXT: vmfne.vv v0, v12, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v12, v12 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v4, v12, v12 +; ZVFHMIN-NEXT: vmor.mm v0, v0, v4 ; ZVFHMIN-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1991,9 +1991,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp one <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2003,9 +2003,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2017,9 +2017,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2053,9 +2053,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2066,9 +2066,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2081,9 +2081,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2095,9 +2095,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32_nonans(<vscale x 8 x float> %va, < ; CHECK-LABEL: fcmp_ord_vv_nxv8f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2108,9 +2108,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2122,9 +2122,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp ueq <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2134,9 +2134,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2148,9 +2148,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2184,8 +2184,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ugt <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2195,8 +2195,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ugt_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2208,8 +2208,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ugt_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2243,8 +2243,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp uge <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2254,8 +2254,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_uge_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2267,8 +2267,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_uge_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2302,8 +2302,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ult <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2313,8 +2313,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ult_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2326,8 +2326,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ult_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2361,8 +2361,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ule <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2372,8 +2372,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ule_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2385,8 +2385,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK-LABEL: fcmp_ule_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2476,9 +2476,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2489,9 +2489,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2504,9 +2504,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2518,9 +2518,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32_nonans(<vscale x 8 x float> %va, < ; CHECK-LABEL: fcmp_uno_vv_nxv8f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x float> %va, %vb ret <vscale x 8 x i1> %vc @@ -2531,9 +2531,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32_nonans(<vscale x 8 x float> %va, f ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -2825,9 +2825,9 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp one <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2837,9 +2837,9 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2851,9 +2851,9 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2887,9 +2887,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2900,9 +2900,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2915,9 +2915,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2929,9 +2929,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK-LABEL: fcmp_ord_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ord <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2942,9 +2942,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2956,9 +2956,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp ueq <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -2968,9 +2968,9 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -2982,9 +2982,9 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3018,8 +3018,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ugt <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3029,8 +3029,8 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3042,8 +3042,8 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ugt_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3077,8 +3077,8 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp uge <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3088,8 +3088,8 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3101,8 +3101,8 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_uge_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3136,8 +3136,8 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ult <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3147,8 +3147,8 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3160,8 +3160,8 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ult_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3195,8 +3195,8 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ule <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3206,8 +3206,8 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3219,8 +3219,8 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmp_ule_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3310,9 +3310,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3323,9 +3323,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3338,9 +3338,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3352,9 +3352,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK-LABEL: fcmp_uno_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %vc = fcmp uno <vscale x 8 x double> %va, %vb ret <vscale x 8 x i1> %vc @@ -3365,9 +3365,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64_nonans(<vscale x 8 x double> %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -3382,52 +3382,52 @@ define <vscale x 16 x i1> @fcmp_oeq_vf_nx16f64(<vscale x 16 x double> %va) { ; RV32: # %bb.0: ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vmfeq.vf v24, v16, fa5 +; RV32-NEXT: vmfeq.vf v16, v16, fa5 ; RV32-NEXT: vmfeq.vf v0, v8, fa5 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: add a1, a0, a0 ; RV32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV32-NEXT: vslideup.vx v0, v24, a0 +; RV32-NEXT: vslideup.vx v0, v16, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: fcmp_oeq_vf_nx16f64: ; RV64: # %bb.0: ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV64-NEXT: vmfeq.vf v24, v16, fa5 +; RV64-NEXT: vmfeq.vf v16, v16, fa5 ; RV64-NEXT: vmfeq.vf v0, v8, fa5 ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a0, a0, 3 ; RV64-NEXT: add a1, a0, a0 ; RV64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; RV64-NEXT: vslideup.vx v0, v24, a0 +; RV64-NEXT: vslideup.vx v0, v16, a0 ; RV64-NEXT: ret ; ; ZVFHMIN32-LABEL: fcmp_oeq_vf_nx16f64: ; ZVFHMIN32: # %bb.0: ; ZVFHMIN32-NEXT: fcvt.d.w fa5, zero ; ZVFHMIN32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; ZVFHMIN32-NEXT: vmfeq.vf v24, v16, fa5 +; ZVFHMIN32-NEXT: vmfeq.vf v16, v16, fa5 ; ZVFHMIN32-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN32-NEXT: csrr a0, vlenb ; ZVFHMIN32-NEXT: srli a0, a0, 3 ; ZVFHMIN32-NEXT: add a1, a0, a0 ; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; ZVFHMIN32-NEXT: vslideup.vx v0, v24, a0 +; ZVFHMIN32-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fcmp_oeq_vf_nx16f64: ; ZVFHMIN64: # %bb.0: ; ZVFHMIN64-NEXT: fmv.d.x fa5, zero ; ZVFHMIN64-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; ZVFHMIN64-NEXT: vmfeq.vf v24, v16, fa5 +; ZVFHMIN64-NEXT: vmfeq.vf v16, v16, fa5 ; ZVFHMIN64-NEXT: vmfeq.vf v0, v8, fa5 ; ZVFHMIN64-NEXT: csrr a0, vlenb ; ZVFHMIN64-NEXT: srli a0, a0, 3 ; ZVFHMIN64-NEXT: add a1, a0, a0 ; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; ZVFHMIN64-NEXT: vslideup.vx v0, v24, a0 +; ZVFHMIN64-NEXT: vslideup.vx v0, v16, a0 ; ZVFHMIN64-NEXT: ret %vc = fcmp oeq <vscale x 16 x double> %va, zeroinitializer ret <vscale x 16 x i1> %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll index 0dede98..930228c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1089,48 +1089,37 @@ define <vscale x 128 x i1> @icmp_eq_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a4, a0, a1 -; CHECK-NEXT: vl8r.v v8, (a4) -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v24, (a4) +; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: sltu a4, a3, a0 ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1151,16 +1140,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_nxv128i8(<vscale x 128 x i8> %va, i8 %b, ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB97_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB97_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0 %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer @@ -1181,16 +1169,15 @@ define <vscale x 128 x i1> @icmp_eq_vx_swap_nxv128i8(<vscale x 128 x i8> %va, i8 ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a3, a4, a3 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB98_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB98_2: ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: vmv1r.v v8, v24 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 128 x i8> poison, i8 %b, i8 0 %vb = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer @@ -1698,8 +1685,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1709,8 +1695,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs ; CHECK-LABEL: icmp_eq_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1722,8 +1707,7 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b ; CHECK-LABEL: icmp_eq_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1735,8 +1719,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1746,8 +1729,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1757,8 +1739,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1768,8 +1749,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <vs ; CHECK-LABEL: icmp_ne_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1781,8 +1761,7 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 %b ; CHECK-LABEL: icmp_ne_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1794,8 +1773,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1805,8 +1783,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vscal ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsne.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1816,8 +1793,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1827,8 +1803,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_ugt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1840,8 +1815,7 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_ugt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1853,8 +1827,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1864,8 +1837,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1875,8 +1847,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_uge_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1886,10 +1857,9 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_uge_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1901,8 +1871,7 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_uge_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1914,8 +1883,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_uge_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1925,8 +1893,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1936,8 +1903,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ult_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsltu.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1947,8 +1913,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_ult_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1960,8 +1925,7 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_ult_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -1973,8 +1937,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_ult_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsleu.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1984,8 +1947,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgtu.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -1995,8 +1957,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2006,8 +1967,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sgt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2019,8 +1979,7 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sgt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2032,8 +1991,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2043,8 +2001,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2054,8 +2011,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sge_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2065,10 +2021,9 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sge_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2080,8 +2035,7 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sge_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2093,8 +2047,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sge_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2104,8 +2057,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2115,8 +2067,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_slt_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmslt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2126,8 +2077,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_slt_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmslt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmslt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2139,8 +2089,7 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_slt_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2152,8 +2101,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_slt_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2163,8 +2111,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2174,8 +2121,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sle_vv_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2185,8 +2131,7 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %b, <v ; CHECK-LABEL: icmp_sle_vx_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vx v12, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vx v0, v8, a0, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2198,10 +2143,9 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i32(<vscale x 8 x i32> %va, i32 % ; CHECK-LABEL: icmp_sle_vx_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 +; CHECK-NEXT: vmv.v.x v12, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vv v12, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 8 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer @@ -2213,8 +2157,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i32(<vscale x 8 x i32> %va, <vscale x ; CHECK-LABEL: icmp_sle_vi_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsle.vi v12, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> splat (i32 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2224,8 +2167,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i32(<vscale x 8 x i32> %va, <vsca ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmsgt.vi v12, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i32(<vscale x 8 x i32> splat (i32 4), <vscale x 8 x i32> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -2244,35 +2186,34 @@ define <vscale x 32 x i1> @icmp_eq_vv_nxv32i32(<vscale x 32 x i32> %va, <vscale ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a1, a3, 2 +; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re32.v v24, (a4) +; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: sub a4, a2, a3 ; CHECK-NEXT: sltu a5, a2, a4 ; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: vl8re32.v v8, (a0) ; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: srli a1, a1, 2 -; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v16, v16, v24, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB189_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB189_2: ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmseq.vv v0, v24, v8, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v7, a1 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a1 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -2296,18 +2237,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_nxv32i32(<vscale x 32 x i32> %va, i32 %b, ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB190_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB190_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a2 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer @@ -2329,18 +2269,17 @@ define <vscale x 32 x i1> @icmp_eq_vx_swap_nxv32i32(<vscale x 32 x i32> %va, i32 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a4, a5, a4 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vx v24, v16, a0, v0.t +; CHECK-NEXT: vmseq.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a1, a3, .LBB191_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: .LBB191_2: ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t +; CHECK-NEXT: vmseq.vx v0, v8, a0, v0.t ; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v24, a2 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vslideup.vx v0, v16, a2 ; CHECK-NEXT: ret %elt.head = insertelement <vscale x 32 x i32> poison, i32 %b, i32 0 %vb = shufflevector <vscale x 32 x i32> %elt.head, <vscale x 32 x i32> poison, <vscale x 32 x i32> zeroinitializer @@ -3100,8 +3039,7 @@ define <vscale x 8 x i1> @icmp_eq_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmseq.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3116,18 +3054,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmseq.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3144,18 +3080,16 @@ define <vscale x 8 x i1> @icmp_eq_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmseq.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmseq.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_eq_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmseq.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmseq.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3167,8 +3101,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_eq_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3178,8 +3111,7 @@ define <vscale x 8 x i1> @icmp_eq_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal ; CHECK-LABEL: icmp_eq_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmseq.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"eq", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3189,8 +3121,7 @@ define <vscale x 8 x i1> @icmp_ne_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsne.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3205,18 +3136,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <vs ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsne.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsne.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3233,18 +3162,16 @@ define <vscale x 8 x i1> @icmp_ne_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 %b ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsne.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsne.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ne_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsne.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsne.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3256,8 +3183,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 ; CHECK-LABEL: icmp_ne_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3267,8 +3193,7 @@ define <vscale x 8 x i1> @icmp_ne_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vscal ; CHECK-LABEL: icmp_ne_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsne.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ne", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3278,8 +3203,7 @@ define <vscale x 8 x i1> @icmp_ugt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsltu.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsltu.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3294,18 +3218,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3322,18 +3244,16 @@ define <vscale x 8 x i1> @icmp_ugt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ugt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3345,8 +3265,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ugt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3356,8 +3275,7 @@ define <vscale x 8 x i1> @icmp_ugt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_ugt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ugt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3367,8 +3285,7 @@ define <vscale x 8 x i1> @icmp_uge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_uge_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsleu.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3383,20 +3300,18 @@ define <vscale x 8 x i1> @icmp_uge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsleu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsleu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsleu.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsleu.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3413,18 +3328,16 @@ define <vscale x 8 x i1> @icmp_uge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsleu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsleu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsleu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsleu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3436,8 +3349,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_uge_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3447,8 +3359,7 @@ define <vscale x 8 x i1> @icmp_uge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_uge_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"uge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3458,8 +3369,7 @@ define <vscale x 8 x i1> @icmp_ult_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ult_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsltu.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsltu.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3474,18 +3384,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsltu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsltu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3502,18 +3410,16 @@ define <vscale x 8 x i1> @icmp_ult_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsltu.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_ult_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgtu.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgtu.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3525,8 +3431,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_ult_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsleu.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsleu.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3536,8 +3441,7 @@ define <vscale x 8 x i1> @icmp_ult_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_ult_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgtu.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgtu.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"ult", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3547,8 +3451,7 @@ define <vscale x 8 x i1> @icmp_sgt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmslt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmslt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3563,18 +3466,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3591,18 +3492,16 @@ define <vscale x 8 x i1> @icmp_sgt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sgt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3614,8 +3513,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sgt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3625,8 +3523,7 @@ define <vscale x 8 x i1> @icmp_sgt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sgt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sgt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3636,8 +3533,7 @@ define <vscale x 8 x i1> @icmp_sge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sge_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3652,20 +3548,18 @@ define <vscale x 8 x i1> @icmp_sge_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3682,18 +3576,16 @@ define <vscale x 8 x i1> @icmp_sge_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3705,8 +3597,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sge_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3716,8 +3607,7 @@ define <vscale x 8 x i1> @icmp_sge_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sge_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sge", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3727,8 +3617,7 @@ define <vscale x 8 x i1> @icmp_slt_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_slt_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmslt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmslt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3743,18 +3632,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmslt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmslt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3771,18 +3658,16 @@ define <vscale x 8 x i1> @icmp_slt_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmslt.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_slt_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsgt.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsgt.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3794,8 +3679,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_slt_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3805,8 +3689,7 @@ define <vscale x 8 x i1> @icmp_slt_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_slt_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"slt", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3816,8 +3699,7 @@ define <vscale x 8 x i1> @icmp_sle_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sle_vv_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmsle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3832,18 +3714,16 @@ define <vscale x 8 x i1> @icmp_sle_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b, <v ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v8, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vx v16, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vx v0, v8, a0, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3860,20 +3740,18 @@ define <vscale x 8 x i1> @icmp_sle_vx_swap_nxv8i64(<vscale x 8 x i64> %va, i64 % ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a0), zero +; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 +; RV64-NEXT: vmv.v.x v16, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t -; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: vmsle.vv v0, v16, v8, v0.t ; RV64-NEXT: ret %elt.head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0 %vb = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer @@ -3885,8 +3763,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_nxv8i64(<vscale x 8 x i64> %va, <vscale x ; CHECK-LABEL: icmp_sle_vi_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsle.vi v16, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsle.vi v0, v8, 4, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> splat (i64 4), metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v @@ -3896,8 +3773,7 @@ define <vscale x 8 x i1> @icmp_sle_vi_swap_nxv8i64(<vscale x 8 x i64> %va, <vsca ; CHECK-LABEL: icmp_sle_vi_swap_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmsgt.vi v16, v8, 3, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsgt.vi v0, v8, 3, v0.t ; CHECK-NEXT: ret %v = call <vscale x 8 x i1> @llvm.vp.icmp.nxv8i64(<vscale x 8 x i64> splat (i64 4), <vscale x 8 x i64> %va, metadata !"sle", <vscale x 8 x i1> %m, i32 %evl) ret <vscale x 8 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll index 90ffeff..46de7bd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -2981,10 +2981,10 @@ define <vscale x 16 x i1> @icmp_eq_vi_nx16i64(<vscale x 16 x i64> %va) { ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v16, v16, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vx v0, v24, a0 +; CHECK-NEXT: vslideup.vx v0, v16, a0 ; CHECK-NEXT: ret %vc = icmp eq <vscale x 16 x i64> %va, zeroinitializer ret <vscale x 16 x i1> %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll index f488baf..1491bb6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll @@ -3,9 +3,9 @@ ; RUN: -verify-machineinstrs | FileCheck %s ; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v < %s \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s -; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+fast-unaligned-access < %s \ +; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+zvfh,+v,+unaligned-vector-mem < %s \ ; RUN: -verify-machineinstrs | FileCheck --check-prefix=FAST %s diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 2fb6ee3..369141a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -13,16 +13,17 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_load_nxv16i ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a0 -; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v10, v8, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT: vnsrl.wi v12, v8, 0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vnsrl.wi v12, v8, 8 -; CHECK-NEXT: vmsne.vi v8, v12, 0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 +; CHECK-NEXT: vnsrl.wi v10, v12, 8 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %vec = load <vscale x 32 x i1>, ptr %p %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index f8d2056..889e7d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -17,9 +17,10 @@ define {<vscale x 16 x i1>, <vscale x 16 x i1>} @vector_deinterleave_nxv16i1_nxv ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 -; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 -; CHECK-NEXT: vmsne.vi v8, v10, 0 +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %retval = call {<vscale x 16 x i1>, <vscale x 16 x i1>} @llvm.experimental.vector.deinterleave2.nxv32i1(<vscale x 32 x i1> %vec) ret {<vscale x 16 x i1>, <vscale x 16 x i1>} %retval @@ -90,24 +91,25 @@ declare {<vscale x 2 x i64>, <vscale x 2 x i64>} @llvm.experimental.vector.deint define {<vscale x 64 x i1>, <vscale x 64 x i1>} @vector_deinterleave_nxv64i1_nxv128i1(<vscale x 128 x i1> %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v8, v16, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v16, 0 +; CHECK-NEXT: vnsrl.wi v12, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v8, 8 -; CHECK-NEXT: vnsrl.wi v28, v16, 8 +; CHECK-NEXT: vnsrl.wi v0, v16, 8 +; CHECK-NEXT: vnsrl.wi v4, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v24, 0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: ret %retval = call {<vscale x 64 x i1>, <vscale x 64 x i1>} @llvm.experimental.vector.deinterleave2.nxv128i1(<vscale x 128 x i1> %vec) ret {<vscale x 64 x i1>, <vscale x 64 x i1>} %retval diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll index baad9e1..2a0f0d5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -17,15 +17,15 @@ define void @vector_interleave_store_nxv32i1_nxv16i1(<vscale x 16 x i1> %a, <vsc ; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vwmaccu.vx v12, a1, v10 -; CHECK-NEXT: vmsne.vi v8, v14, 0 -; CHECK-NEXT: vmsne.vi v9, v12, 0 +; CHECK-NEXT: vmsne.vi v0, v14, 0 +; CHECK-NEXT: vmsne.vi v2, v12, 0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a1, a1, 2 ; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v9, v8, a1 +; CHECK-NEXT: vslideup.vx v2, v0, a1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vsm.v v9, (a0) +; CHECK-NEXT: vsm.v v2, (a0) ; CHECK-NEXT: ret %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) store <vscale x 32 x i1> %res, ptr %p @@ -98,43 +98,34 @@ define void @vector_interleave_store_nxv16i64_nxv8i64(<vscale x 8 x i64> %a, <vs ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 ; CHECK-NEXT: vsrl.vi v26, v24, 1 ; CHECK-NEXT: vand.vi v24, v24, 1 -; CHECK-NEXT: vmsne.vi v28, v24, 0 -; CHECK-NEXT: vmv1r.v v0, v28 +; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv4r.v v28, v4 ; CHECK-NEXT: vadd.vx v26, v26, a2, v0.t ; CHECK-NEXT: vmv4r.v v12, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v0, v8, v26 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vmv4r.v v16, v28 ; CHECK-NEXT: vrgatherei16.vv v8, v16, v26 ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vs8r.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index c454483..f0a2bd0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -19,13 +19,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: li a0, -1 ; CHECK-NEXT: vwmaccu.vx v12, a0, v10 -; CHECK-NEXT: vmsne.vi v8, v14, 0 +; CHECK-NEXT: vmsne.vi v2, v14, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a1, a0, a0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v0, v8, a0 +; CHECK-NEXT: vslideup.vx v0, v2, a0 ; CHECK-NEXT: ret ; ; ZVBB-LABEL: vector_interleave_nxv32i1_nxv16i1: @@ -39,13 +39,13 @@ define <vscale x 32 x i1> @vector_interleave_nxv32i1_nxv16i1(<vscale x 16 x i1> ; ZVBB-NEXT: li a0, 1 ; ZVBB-NEXT: vmv1r.v v0, v16 ; ZVBB-NEXT: vwaddu.wx v12, v12, a0, v0.t -; ZVBB-NEXT: vmsne.vi v8, v14, 0 +; ZVBB-NEXT: vmsne.vi v2, v14, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: srli a0, a0, 2 ; ZVBB-NEXT: add a1, a0, a0 ; ZVBB-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVBB-NEXT: vslideup.vx v0, v8, a0 +; ZVBB-NEXT: vslideup.vx v0, v2, a0 ; ZVBB-NEXT: ret %res = call <vscale x 32 x i1> @llvm.experimental.vector.interleave2.nxv32i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) ret <vscale x 32 x i1> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll index 15849fd..45e9854 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll @@ -1942,12 +1942,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ogt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1958,12 +1956,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1976,12 +1972,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1993,12 +1987,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_oge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2009,12 +2001,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2027,12 +2017,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2044,12 +2032,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_olt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2060,12 +2046,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2078,12 +2062,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2095,12 +2077,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ole_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2111,12 +2091,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2129,12 +2107,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2146,14 +2122,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2164,14 +2139,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2184,14 +2158,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2203,9 +2176,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ord_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2216,9 +2189,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2231,9 +2204,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2245,14 +2218,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2263,14 +2235,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2283,14 +2254,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2302,12 +2272,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ugt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2318,12 +2287,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2336,12 +2304,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2353,12 +2320,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_uge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2369,12 +2335,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2387,12 +2352,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2404,12 +2368,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ult_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2420,12 +2383,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2438,12 +2400,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2455,12 +2416,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_ule_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2471,12 +2431,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2489,12 +2448,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2540,9 +2498,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x ; CHECK-LABEL: fcmp_uno_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2553,9 +2511,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2568,9 +2526,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2617,12 +2575,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ogt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2633,12 +2589,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2651,12 +2605,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2668,12 +2620,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_oge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2684,12 +2634,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2702,12 +2650,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2719,12 +2665,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_olt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2735,12 +2679,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2753,12 +2695,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2770,12 +2710,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ole_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2786,12 +2724,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2804,12 +2740,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2821,14 +2755,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_one_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2839,14 +2772,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2859,14 +2791,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2878,9 +2809,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ord_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2891,9 +2822,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2906,9 +2837,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2920,14 +2851,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ueq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2938,14 +2868,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2958,14 +2887,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2977,12 +2905,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ugt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2993,12 +2920,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3011,12 +2937,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3028,12 +2953,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_uge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3044,12 +2968,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3062,12 +2985,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3079,12 +3001,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ult_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3095,12 +3016,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3113,12 +3033,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3130,12 +3049,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_ule_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3146,12 +3064,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3164,12 +3081,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3215,9 +3131,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vscal ; CHECK-LABEL: fcmp_uno_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -3228,9 +3144,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3243,9 +3159,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f16(<vscale x 16 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -3292,12 +3208,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ogt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3308,12 +3222,10 @@ define <vscale x 32 x i1> @fcmp_ogt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3326,12 +3238,10 @@ define <vscale x 32 x i1> @fcmp_ogt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3343,12 +3253,10 @@ define <vscale x 32 x i1> @fcmp_oge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_oge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3359,12 +3267,10 @@ define <vscale x 32 x i1> @fcmp_oge_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3377,12 +3283,10 @@ define <vscale x 32 x i1> @fcmp_oge_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3394,12 +3298,10 @@ define <vscale x 32 x i1> @fcmp_olt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_olt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3410,12 +3312,10 @@ define <vscale x 32 x i1> @fcmp_olt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3428,12 +3328,10 @@ define <vscale x 32 x i1> @fcmp_olt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3445,12 +3343,10 @@ define <vscale x 32 x i1> @fcmp_ole_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ole_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3461,12 +3357,10 @@ define <vscale x 32 x i1> @fcmp_ole_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3479,12 +3373,10 @@ define <vscale x 32 x i1> @fcmp_ole_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3496,14 +3388,13 @@ define <vscale x 32 x i1> @fcmp_one_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_one_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3514,14 +3405,13 @@ define <vscale x 32 x i1> @fcmp_one_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3534,14 +3424,13 @@ define <vscale x 32 x i1> @fcmp_one_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3553,9 +3442,9 @@ define <vscale x 32 x i1> @fcmp_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3566,9 +3455,9 @@ define <vscale x 32 x i1> @fcmp_ord_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3581,9 +3470,9 @@ define <vscale x 32 x i1> @fcmp_ord_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3595,14 +3484,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ueq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3613,14 +3501,13 @@ define <vscale x 32 x i1> @fcmp_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3633,14 +3520,13 @@ define <vscale x 32 x i1> @fcmp_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3652,12 +3538,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ugt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3668,12 +3553,11 @@ define <vscale x 32 x i1> @fcmp_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3686,12 +3570,11 @@ define <vscale x 32 x i1> @fcmp_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3703,12 +3586,11 @@ define <vscale x 32 x i1> @fcmp_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_uge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3719,12 +3601,11 @@ define <vscale x 32 x i1> @fcmp_uge_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3737,12 +3618,11 @@ define <vscale x 32 x i1> @fcmp_uge_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3754,12 +3634,11 @@ define <vscale x 32 x i1> @fcmp_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ult_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3770,12 +3649,11 @@ define <vscale x 32 x i1> @fcmp_ult_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3788,12 +3666,11 @@ define <vscale x 32 x i1> @fcmp_ult_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3805,12 +3682,11 @@ define <vscale x 32 x i1> @fcmp_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_ule_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3821,12 +3697,11 @@ define <vscale x 32 x i1> @fcmp_ule_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3839,12 +3714,11 @@ define <vscale x 32 x i1> @fcmp_ule_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3890,9 +3764,9 @@ define <vscale x 32 x i1> @fcmp_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vscal ; CHECK-LABEL: fcmp_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmp.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3903,9 +3777,9 @@ define <vscale x 32 x i1> @fcmp_uno_vf_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3918,9 +3792,9 @@ define <vscale x 32 x i1> @fcmp_uno_fv_nxv32f16(<vscale x 32 x half> %va, half % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -5233,12 +5107,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5249,12 +5121,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5267,12 +5137,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5284,12 +5152,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5300,12 +5166,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5318,12 +5182,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5335,12 +5197,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5351,12 +5211,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5369,12 +5227,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5386,12 +5242,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5402,12 +5256,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5420,12 +5272,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5437,14 +5287,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5455,14 +5304,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5475,14 +5323,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5494,9 +5341,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5507,9 +5354,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5522,9 +5369,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5536,14 +5383,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5554,14 +5400,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5574,14 +5419,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5593,12 +5437,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5609,12 +5452,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5627,12 +5469,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5644,12 +5485,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5660,12 +5500,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5678,12 +5517,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5695,12 +5533,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5711,12 +5548,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5729,12 +5565,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5746,12 +5581,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5762,12 +5596,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5780,12 +5613,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5831,9 +5663,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -5844,9 +5676,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5859,9 +5691,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f32(<vscale x 4 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -5908,12 +5740,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5924,12 +5754,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5942,12 +5770,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5959,12 +5785,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5975,12 +5799,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5993,12 +5815,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6010,12 +5830,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6026,12 +5844,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6044,12 +5860,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6061,12 +5875,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6077,12 +5889,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6095,12 +5905,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6112,14 +5920,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6130,14 +5937,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6150,14 +5956,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6169,9 +5974,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6182,9 +5987,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6197,9 +6002,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6211,14 +6016,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6229,14 +6033,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6249,14 +6052,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6268,12 +6070,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6284,12 +6085,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6302,12 +6102,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6319,12 +6118,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6335,12 +6133,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6353,12 +6150,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6370,12 +6166,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6386,12 +6181,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6404,12 +6198,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6421,12 +6214,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6437,12 +6229,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6455,12 +6246,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6506,9 +6296,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -6519,9 +6309,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6534,9 +6324,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f32(<vscale x 8 x float> %va, float %b ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -6583,12 +6373,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ogt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6599,12 +6387,10 @@ define <vscale x 16 x i1> @fcmp_ogt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6617,12 +6403,10 @@ define <vscale x 16 x i1> @fcmp_ogt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6634,12 +6418,10 @@ define <vscale x 16 x i1> @fcmp_oge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_oge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6650,12 +6432,10 @@ define <vscale x 16 x i1> @fcmp_oge_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6668,12 +6448,10 @@ define <vscale x 16 x i1> @fcmp_oge_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6685,12 +6463,10 @@ define <vscale x 16 x i1> @fcmp_olt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_olt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6701,12 +6477,10 @@ define <vscale x 16 x i1> @fcmp_olt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6719,12 +6493,10 @@ define <vscale x 16 x i1> @fcmp_olt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6736,12 +6508,10 @@ define <vscale x 16 x i1> @fcmp_ole_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ole_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6752,12 +6522,10 @@ define <vscale x 16 x i1> @fcmp_ole_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6770,12 +6538,10 @@ define <vscale x 16 x i1> @fcmp_ole_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6787,14 +6553,13 @@ define <vscale x 16 x i1> @fcmp_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_one_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6805,14 +6570,13 @@ define <vscale x 16 x i1> @fcmp_one_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6825,14 +6589,13 @@ define <vscale x 16 x i1> @fcmp_one_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6844,9 +6607,9 @@ define <vscale x 16 x i1> @fcmp_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6857,9 +6620,9 @@ define <vscale x 16 x i1> @fcmp_ord_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6872,9 +6635,9 @@ define <vscale x 16 x i1> @fcmp_ord_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6886,14 +6649,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ueq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6904,14 +6666,13 @@ define <vscale x 16 x i1> @fcmp_ueq_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6924,14 +6685,13 @@ define <vscale x 16 x i1> @fcmp_ueq_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6943,12 +6703,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ugt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -6959,12 +6718,11 @@ define <vscale x 16 x i1> @fcmp_ugt_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6977,12 +6735,11 @@ define <vscale x 16 x i1> @fcmp_ugt_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6994,12 +6751,11 @@ define <vscale x 16 x i1> @fcmp_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_uge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7010,12 +6766,11 @@ define <vscale x 16 x i1> @fcmp_uge_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7028,12 +6783,11 @@ define <vscale x 16 x i1> @fcmp_uge_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7045,12 +6799,11 @@ define <vscale x 16 x i1> @fcmp_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ult_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7061,12 +6814,11 @@ define <vscale x 16 x i1> @fcmp_ult_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7079,12 +6831,11 @@ define <vscale x 16 x i1> @fcmp_ult_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7096,12 +6847,11 @@ define <vscale x 16 x i1> @fcmp_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_ule_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7112,12 +6862,11 @@ define <vscale x 16 x i1> @fcmp_ule_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7130,12 +6879,11 @@ define <vscale x 16 x i1> @fcmp_ule_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7181,9 +6929,9 @@ define <vscale x 16 x i1> @fcmp_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsca ; CHECK-LABEL: fcmp_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmp.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -7194,9 +6942,9 @@ define <vscale x 16 x i1> @fcmp_uno_vf_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7209,9 +6957,9 @@ define <vscale x 16 x i1> @fcmp_uno_fv_nxv16f32(<vscale x 16 x float> %va, float ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -7891,12 +7639,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -7907,12 +7653,10 @@ define <vscale x 2 x i1> @fcmp_ogt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7925,12 +7669,10 @@ define <vscale x 2 x i1> @fcmp_ogt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7942,12 +7684,10 @@ define <vscale x 2 x i1> @fcmp_oge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -7958,12 +7698,10 @@ define <vscale x 2 x i1> @fcmp_oge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7976,12 +7714,10 @@ define <vscale x 2 x i1> @fcmp_oge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -7993,12 +7729,10 @@ define <vscale x 2 x i1> @fcmp_olt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8009,12 +7743,10 @@ define <vscale x 2 x i1> @fcmp_olt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8027,12 +7759,10 @@ define <vscale x 2 x i1> @fcmp_olt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8044,12 +7774,10 @@ define <vscale x 2 x i1> @fcmp_ole_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vv v13, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8060,12 +7788,10 @@ define <vscale x 2 x i1> @fcmp_ole_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8078,12 +7804,10 @@ define <vscale x 2 x i1> @fcmp_ole_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8095,14 +7819,13 @@ define <vscale x 2 x i1> @fcmp_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8113,14 +7836,13 @@ define <vscale x 2 x i1> @fcmp_one_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8133,14 +7855,13 @@ define <vscale x 2 x i1> @fcmp_one_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8152,9 +7873,9 @@ define <vscale x 2 x i1> @fcmp_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8165,9 +7886,9 @@ define <vscale x 2 x i1> @fcmp_ord_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8180,9 +7901,9 @@ define <vscale x 2 x i1> @fcmp_ord_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8194,14 +7915,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vv v13, v10, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vv v2, v8, v10, v0.t +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8212,14 +7932,13 @@ define <vscale x 2 x i1> @fcmp_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8232,14 +7951,13 @@ define <vscale x 2 x i1> @fcmp_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8251,12 +7969,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8267,12 +7984,11 @@ define <vscale x 2 x i1> @fcmp_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8285,12 +8001,11 @@ define <vscale x 2 x i1> @fcmp_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8302,12 +8017,11 @@ define <vscale x 2 x i1> @fcmp_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v10, v10 -; CHECK-NEXT: vmfeq.vv v13, v8, v8 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8318,12 +8032,11 @@ define <vscale x 2 x i1> @fcmp_uge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8336,12 +8049,11 @@ define <vscale x 2 x i1> @fcmp_uge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8353,12 +8065,11 @@ define <vscale x 2 x i1> @fcmp_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8369,12 +8080,11 @@ define <vscale x 2 x i1> @fcmp_ult_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8387,12 +8097,11 @@ define <vscale x 2 x i1> @fcmp_ult_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8404,12 +8113,11 @@ define <vscale x 2 x i1> @fcmp_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmfeq.vv v13, v10, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vv v12, v10, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v2, v10, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8420,12 +8128,11 @@ define <vscale x 2 x i1> @fcmp_ule_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8438,12 +8145,11 @@ define <vscale x 2 x i1> @fcmp_ule_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfeq.vf v12, v10, fa0 -; CHECK-NEXT: vmfeq.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 -; CHECK-NEXT: vmv1r.v v10, v0 -; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfeq.vf v0, v10, fa0 +; CHECK-NEXT: vmfeq.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8489,9 +8195,9 @@ define <vscale x 2 x i1> @fcmp_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfne.vv v12, v10, v10 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vv v0, v10, v10 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmp.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -8502,9 +8208,9 @@ define <vscale x 2 x i1> @fcmp_uno_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v10, v12 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8517,9 +8223,9 @@ define <vscale x 2 x i1> @fcmp_uno_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfne.vf v12, v10, fa0 -; CHECK-NEXT: vmfne.vv v10, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v10 +; CHECK-NEXT: vmfne.vf v0, v10, fa0 +; CHECK-NEXT: vmfne.vv v2, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -8566,12 +8272,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8582,12 +8286,10 @@ define <vscale x 4 x i1> @fcmp_ogt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8600,12 +8302,10 @@ define <vscale x 4 x i1> @fcmp_ogt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8617,12 +8317,10 @@ define <vscale x 4 x i1> @fcmp_oge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8633,12 +8331,10 @@ define <vscale x 4 x i1> @fcmp_oge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8651,12 +8347,10 @@ define <vscale x 4 x i1> @fcmp_oge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8668,12 +8362,10 @@ define <vscale x 4 x i1> @fcmp_olt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8684,12 +8376,10 @@ define <vscale x 4 x i1> @fcmp_olt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8702,12 +8392,10 @@ define <vscale x 4 x i1> @fcmp_olt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8719,12 +8407,10 @@ define <vscale x 4 x i1> @fcmp_ole_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vv v17, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8735,12 +8421,10 @@ define <vscale x 4 x i1> @fcmp_ole_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8753,12 +8437,10 @@ define <vscale x 4 x i1> @fcmp_ole_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8770,14 +8452,13 @@ define <vscale x 4 x i1> @fcmp_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8788,14 +8469,13 @@ define <vscale x 4 x i1> @fcmp_one_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8808,14 +8488,13 @@ define <vscale x 4 x i1> @fcmp_one_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8827,9 +8506,9 @@ define <vscale x 4 x i1> @fcmp_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8840,9 +8519,9 @@ define <vscale x 4 x i1> @fcmp_ord_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8855,9 +8534,9 @@ define <vscale x 4 x i1> @fcmp_ord_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8869,14 +8548,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vv v17, v12, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vv v4, v8, v12, v0.t +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8887,14 +8565,13 @@ define <vscale x 4 x i1> @fcmp_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8907,14 +8584,13 @@ define <vscale x 4 x i1> @fcmp_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v13, v0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmv1r.v v4, v0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8926,12 +8602,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8942,12 +8617,11 @@ define <vscale x 4 x i1> @fcmp_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8960,12 +8634,11 @@ define <vscale x 4 x i1> @fcmp_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -8977,12 +8650,11 @@ define <vscale x 4 x i1> @fcmp_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v12, v12 -; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v8, v12, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -8993,12 +8665,11 @@ define <vscale x 4 x i1> @fcmp_uge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9011,12 +8682,11 @@ define <vscale x 4 x i1> @fcmp_uge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9028,12 +8698,11 @@ define <vscale x 4 x i1> @fcmp_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9044,12 +8713,11 @@ define <vscale x 4 x i1> @fcmp_ult_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfge.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9062,12 +8730,11 @@ define <vscale x 4 x i1> @fcmp_ult_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfle.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9079,12 +8746,11 @@ define <vscale x 4 x i1> @fcmp_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmfeq.vv v17, v12, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vv v16, v12, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v4, v12, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9095,12 +8761,11 @@ define <vscale x 4 x i1> @fcmp_ule_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmfgt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9113,12 +8778,11 @@ define <vscale x 4 x i1> @fcmp_ule_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfeq.vf v16, v12, fa0 -; CHECK-NEXT: vmfeq.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 -; CHECK-NEXT: vmv1r.v v12, v0 -; CHECK-NEXT: vmflt.vf v12, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfeq.vf v0, v12, fa0 +; CHECK-NEXT: vmfeq.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9164,9 +8828,9 @@ define <vscale x 4 x i1> @fcmp_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfne.vv v16, v12, v12 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vv v0, v12, v12 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmp.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -9177,9 +8841,9 @@ define <vscale x 4 x i1> @fcmp_uno_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v12, v16 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9192,9 +8856,9 @@ define <vscale x 4 x i1> @fcmp_uno_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfne.vf v16, v12, fa0 -; CHECK-NEXT: vmfne.vv v12, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v12 +; CHECK-NEXT: vmfne.vf v0, v12, fa0 +; CHECK-NEXT: vmfne.vv v4, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -9241,12 +8905,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9257,12 +8919,10 @@ define <vscale x 8 x i1> @fcmp_ogt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9275,12 +8935,10 @@ define <vscale x 8 x i1> @fcmp_ogt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9292,12 +8950,10 @@ define <vscale x 8 x i1> @fcmp_oge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9308,12 +8964,10 @@ define <vscale x 8 x i1> @fcmp_oge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9326,12 +8980,10 @@ define <vscale x 8 x i1> @fcmp_oge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9343,12 +8995,10 @@ define <vscale x 8 x i1> @fcmp_olt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9359,12 +9009,10 @@ define <vscale x 8 x i1> @fcmp_olt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9377,12 +9025,10 @@ define <vscale x 8 x i1> @fcmp_olt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9394,12 +9040,10 @@ define <vscale x 8 x i1> @fcmp_ole_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmfle.vv v25, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v25 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9410,12 +9054,10 @@ define <vscale x 8 x i1> @fcmp_ole_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9428,12 +9070,10 @@ define <vscale x 8 x i1> @fcmp_ole_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v17 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9445,14 +9085,13 @@ define <vscale x 8 x i1> @fcmp_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9463,14 +9102,13 @@ define <vscale x 8 x i1> @fcmp_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9483,14 +9121,13 @@ define <vscale x 8 x i1> @fcmp_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9502,9 +9139,9 @@ define <vscale x 8 x i1> @fcmp_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9515,9 +9152,9 @@ define <vscale x 8 x i1> @fcmp_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9530,9 +9167,9 @@ define <vscale x 8 x i1> @fcmp_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 -; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 +; CHECK-NEXT: vmfeq.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9544,14 +9181,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmv1r.v v25, v0 -; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v24 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9562,14 +9198,13 @@ define <vscale x 8 x i1> @fcmp_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9582,14 +9217,13 @@ define <vscale x 8 x i1> @fcmp_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v17, v0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0, v0.t -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9601,12 +9235,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9617,12 +9250,11 @@ define <vscale x 8 x i1> @fcmp_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9635,12 +9267,11 @@ define <vscale x 8 x i1> @fcmp_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9652,12 +9283,11 @@ define <vscale x 8 x i1> @fcmp_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v16, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9668,12 +9298,11 @@ define <vscale x 8 x i1> @fcmp_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9686,12 +9315,11 @@ define <vscale x 8 x i1> @fcmp_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9703,12 +9331,11 @@ define <vscale x 8 x i1> @fcmp_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmfle.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9719,12 +9346,11 @@ define <vscale x 8 x i1> @fcmp_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9737,12 +9363,11 @@ define <vscale x 8 x i1> @fcmp_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmfle.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9754,12 +9379,11 @@ define <vscale x 8 x i1> @fcmp_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: vmflt.vv v0, v16, v8, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9770,12 +9394,11 @@ define <vscale x 8 x i1> @fcmp_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9788,12 +9411,11 @@ define <vscale x 8 x i1> @fcmp_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 -; CHECK-NEXT: vmv1r.v v16, v0 -; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmand.mm v0, v16, v0 +; CHECK-NEXT: vmflt.vf v0, v8, fa0, v0.t +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9839,9 +9461,9 @@ define <vscale x 8 x i1> @fcmp_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscale ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vv v0, v16, v16 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmp.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -9852,9 +9474,9 @@ define <vscale x 8 x i1> @fcmp_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -9867,9 +9489,9 @@ define <vscale x 8 x i1> @fcmp_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 -; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 +; CHECK-NEXT: vmfne.vv v8, v8, v8 +; CHECK-NEXT: vmor.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll index 9a10359..32e925f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll @@ -1605,9 +1605,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1617,9 +1617,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_oeq_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1631,9 +1631,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_oeq_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1781,9 +1781,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1793,9 +1793,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_one_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1807,9 +1807,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_one_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1821,9 +1821,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1834,9 +1834,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1849,9 +1849,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1863,9 +1863,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1875,9 +1875,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ueq_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1889,9 +1889,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ueq_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1903,8 +1903,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1914,8 +1914,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ugt_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1927,8 +1927,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ugt_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1940,8 +1940,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1951,8 +1951,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_uge_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1964,8 +1964,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_uge_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -1977,8 +1977,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -1988,8 +1988,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ult_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2001,8 +2001,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ult_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2014,8 +2014,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2025,8 +2025,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ule_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2038,8 +2038,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_ule_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2051,9 +2051,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2063,9 +2063,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_une_vf_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2077,9 +2077,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK-LABEL: fcmps_une_fv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2091,10 +2091,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f16(<vscale x 8 x half> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv8f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -2105,10 +2105,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2121,10 +2121,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f16(<vscale x 8 x half> %va, half %b) ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer @@ -2137,9 +2137,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_oeq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2149,9 +2149,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_oeq_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2163,9 +2163,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_oeq_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2313,9 +2313,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_one_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2325,9 +2325,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_one_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2339,9 +2339,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_one_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2353,9 +2353,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ord_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2366,9 +2366,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2381,9 +2381,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2395,9 +2395,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ueq_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2407,9 +2407,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ueq_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2421,9 +2421,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ueq_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2435,8 +2435,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ugt_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2446,8 +2446,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ugt_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2459,8 +2459,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ugt_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2472,8 +2472,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_uge_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2483,8 +2483,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_uge_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2496,8 +2496,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_uge_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2509,8 +2509,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ult_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2520,8 +2520,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ult_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2533,8 +2533,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ult_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2546,8 +2546,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_ule_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2557,8 +2557,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ule_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2570,8 +2570,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_ule_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2583,9 +2583,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_une_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2595,9 +2595,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_une_vf_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2609,9 +2609,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK-LABEL: fcmps_une_fv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2623,10 +2623,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f16(<vscale x 16 x half> %va, <vsca ; CHECK-LABEL: fcmps_uno_vv_nxv16f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -2637,10 +2637,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2653,10 +2653,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f16(<vscale x 16 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer @@ -2669,9 +2669,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_oeq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2681,9 +2681,9 @@ define <vscale x 32 x i1> @fcmps_oeq_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_oeq_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2695,9 +2695,9 @@ define <vscale x 32 x i1> @fcmps_oeq_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_oeq_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2845,9 +2845,9 @@ define <vscale x 32 x i1> @fcmps_one_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_one_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2857,9 +2857,9 @@ define <vscale x 32 x i1> @fcmps_one_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_one_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2871,9 +2871,9 @@ define <vscale x 32 x i1> @fcmps_one_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_one_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2885,9 +2885,9 @@ define <vscale x 32 x i1> @fcmps_ord_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2898,9 +2898,9 @@ define <vscale x 32 x i1> @fcmps_ord_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2913,9 +2913,9 @@ define <vscale x 32 x i1> @fcmps_ord_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2927,9 +2927,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ueq_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2939,9 +2939,9 @@ define <vscale x 32 x i1> @fcmps_ueq_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ueq_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2953,9 +2953,9 @@ define <vscale x 32 x i1> @fcmps_ueq_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ueq_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2967,8 +2967,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ugt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -2978,8 +2978,8 @@ define <vscale x 32 x i1> @fcmps_ugt_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ugt_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -2991,8 +2991,8 @@ define <vscale x 32 x i1> @fcmps_ugt_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ugt_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3004,8 +3004,8 @@ define <vscale x 32 x i1> @fcmps_uge_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_uge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3015,8 +3015,8 @@ define <vscale x 32 x i1> @fcmps_uge_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_uge_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3028,8 +3028,8 @@ define <vscale x 32 x i1> @fcmps_uge_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_uge_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3041,8 +3041,8 @@ define <vscale x 32 x i1> @fcmps_ult_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ult_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3052,8 +3052,8 @@ define <vscale x 32 x i1> @fcmps_ult_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ult_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3065,8 +3065,8 @@ define <vscale x 32 x i1> @fcmps_ult_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ult_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3078,8 +3078,8 @@ define <vscale x 32 x i1> @fcmps_ule_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_ule_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3089,8 +3089,8 @@ define <vscale x 32 x i1> @fcmps_ule_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ule_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3102,8 +3102,8 @@ define <vscale x 32 x i1> @fcmps_ule_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_ule_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3115,9 +3115,9 @@ define <vscale x 32 x i1> @fcmps_une_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_une_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3127,9 +3127,9 @@ define <vscale x 32 x i1> @fcmps_une_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_une_vf_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3141,9 +3141,9 @@ define <vscale x 32 x i1> @fcmps_une_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK-LABEL: fcmps_une_fv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3155,10 +3155,10 @@ define <vscale x 32 x i1> @fcmps_uno_vv_nxv32f16(<vscale x 32 x half> %va, <vsca ; CHECK-LABEL: fcmps_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 32 x i1> @llvm.experimental.constrained.fcmps.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 32 x i1> %1 @@ -3169,10 +3169,10 @@ define <vscale x 32 x i1> @fcmps_uno_vf_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -3185,10 +3185,10 @@ define <vscale x 32 x i1> @fcmps_uno_fv_nxv32f16(<vscale x 32 x half> %va, half ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 32 x half> poison, half %b, i32 0 %splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer @@ -4265,9 +4265,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4277,9 +4277,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4291,9 +4291,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4441,9 +4441,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4453,9 +4453,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_one_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4467,9 +4467,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_one_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4481,9 +4481,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4494,9 +4494,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4509,9 +4509,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4523,9 +4523,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4535,9 +4535,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4549,9 +4549,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4563,8 +4563,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4574,8 +4574,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4587,8 +4587,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4600,8 +4600,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4611,8 +4611,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_uge_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4624,8 +4624,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_uge_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4637,8 +4637,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4648,8 +4648,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ult_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4661,8 +4661,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ult_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4674,8 +4674,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4685,8 +4685,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ule_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4698,8 +4698,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_ule_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4711,9 +4711,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4723,9 +4723,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_une_vf_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4737,9 +4737,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK-LABEL: fcmps_une_fv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4751,10 +4751,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f32(<vscale x 4 x float> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv4f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -4765,10 +4765,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4781,10 +4781,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f32(<vscale x 4 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 4 x float> %head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer @@ -4797,9 +4797,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_oeq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -4809,9 +4809,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4823,9 +4823,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_oeq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4973,9 +4973,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_one_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -4985,9 +4985,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_one_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -4999,9 +4999,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_one_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5013,9 +5013,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ord_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5026,9 +5026,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5041,9 +5041,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5055,9 +5055,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ueq_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5067,9 +5067,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5081,9 +5081,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ueq_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5095,8 +5095,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ugt_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5106,8 +5106,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5119,8 +5119,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ugt_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5132,8 +5132,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_uge_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5143,8 +5143,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_uge_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5156,8 +5156,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_uge_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5169,8 +5169,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ult_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5180,8 +5180,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ult_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5193,8 +5193,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ult_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5206,8 +5206,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_ule_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5217,8 +5217,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ule_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5230,8 +5230,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_ule_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5243,9 +5243,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_une_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5255,9 +5255,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_une_vf_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5269,9 +5269,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK-LABEL: fcmps_une_fv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5283,10 +5283,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f32(<vscale x 8 x float> %va, <vscale ; CHECK-LABEL: fcmps_uno_vv_nxv8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -5297,10 +5297,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5313,10 +5313,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f32(<vscale x 8 x float> %va, float % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer @@ -5329,9 +5329,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_oeq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5341,9 +5341,9 @@ define <vscale x 16 x i1> @fcmps_oeq_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_oeq_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5355,9 +5355,9 @@ define <vscale x 16 x i1> @fcmps_oeq_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_oeq_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5505,9 +5505,9 @@ define <vscale x 16 x i1> @fcmps_one_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_one_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5517,9 +5517,9 @@ define <vscale x 16 x i1> @fcmps_one_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_one_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5531,9 +5531,9 @@ define <vscale x 16 x i1> @fcmps_one_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_one_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5545,9 +5545,9 @@ define <vscale x 16 x i1> @fcmps_ord_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5558,9 +5558,9 @@ define <vscale x 16 x i1> @fcmps_ord_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5573,9 +5573,9 @@ define <vscale x 16 x i1> @fcmps_ord_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5587,9 +5587,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ueq_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5599,9 +5599,9 @@ define <vscale x 16 x i1> @fcmps_ueq_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ueq_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5613,9 +5613,9 @@ define <vscale x 16 x i1> @fcmps_ueq_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ueq_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5627,8 +5627,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ugt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5638,8 +5638,8 @@ define <vscale x 16 x i1> @fcmps_ugt_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ugt_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5651,8 +5651,8 @@ define <vscale x 16 x i1> @fcmps_ugt_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ugt_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5664,8 +5664,8 @@ define <vscale x 16 x i1> @fcmps_uge_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_uge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5675,8 +5675,8 @@ define <vscale x 16 x i1> @fcmps_uge_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_uge_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5688,8 +5688,8 @@ define <vscale x 16 x i1> @fcmps_uge_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_uge_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5701,8 +5701,8 @@ define <vscale x 16 x i1> @fcmps_ult_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ult_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5712,8 +5712,8 @@ define <vscale x 16 x i1> @fcmps_ult_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ult_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5725,8 +5725,8 @@ define <vscale x 16 x i1> @fcmps_ult_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ult_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5738,8 +5738,8 @@ define <vscale x 16 x i1> @fcmps_ule_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_ule_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5749,8 +5749,8 @@ define <vscale x 16 x i1> @fcmps_ule_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ule_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5762,8 +5762,8 @@ define <vscale x 16 x i1> @fcmps_ule_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_ule_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5775,9 +5775,9 @@ define <vscale x 16 x i1> @fcmps_une_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_une_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5787,9 +5787,9 @@ define <vscale x 16 x i1> @fcmps_une_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_une_vf_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5801,9 +5801,9 @@ define <vscale x 16 x i1> @fcmps_une_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK-LABEL: fcmps_une_fv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5815,10 +5815,10 @@ define <vscale x 16 x i1> @fcmps_uno_vv_nxv16f32(<vscale x 16 x float> %va, <vsc ; CHECK-LABEL: fcmps_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 16 x i1> @llvm.experimental.constrained.fcmps.nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 16 x i1> %1 @@ -5829,10 +5829,10 @@ define <vscale x 16 x i1> @fcmps_uno_vf_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -5845,10 +5845,10 @@ define <vscale x 16 x i1> @fcmps_uno_fv_nxv16f32(<vscale x 16 x float> %va, floa ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 16 x float> poison, float %b, i32 0 %splat = shufflevector <vscale x 16 x float> %head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer @@ -6393,9 +6393,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6405,9 +6405,9 @@ define <vscale x 2 x i1> @fcmps_oeq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6419,9 +6419,9 @@ define <vscale x 2 x i1> @fcmps_oeq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6569,9 +6569,9 @@ define <vscale x 2 x i1> @fcmps_one_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6581,9 +6581,9 @@ define <vscale x 2 x i1> @fcmps_one_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6595,9 +6595,9 @@ define <vscale x 2 x i1> @fcmps_one_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6609,9 +6609,9 @@ define <vscale x 2 x i1> @fcmps_ord_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6622,9 +6622,9 @@ define <vscale x 2 x i1> @fcmps_ord_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v10, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6637,9 +6637,9 @@ define <vscale x 2 x i1> @fcmps_ord_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v10 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v2 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6651,9 +6651,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmflt.vv v13, v10, v8 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v2, v10, v8 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6663,9 +6663,9 @@ define <vscale x 2 x i1> @fcmps_ueq_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6677,9 +6677,9 @@ define <vscale x 2 x i1> @fcmps_ueq_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmflt.vf v11, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v11, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v2, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6691,8 +6691,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6702,8 +6702,8 @@ define <vscale x 2 x i1> @fcmps_ugt_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6715,8 +6715,8 @@ define <vscale x 2 x i1> @fcmps_ugt_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6728,8 +6728,8 @@ define <vscale x 2 x i1> @fcmps_uge_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v8, v10 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6739,8 +6739,8 @@ define <vscale x 2 x i1> @fcmps_uge_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6752,8 +6752,8 @@ define <vscale x 2 x i1> @fcmps_uge_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6765,8 +6765,8 @@ define <vscale x 2 x i1> @fcmps_ult_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6776,8 +6776,8 @@ define <vscale x 2 x i1> @fcmps_ult_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6789,8 +6789,8 @@ define <vscale x 2 x i1> @fcmps_ult_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6802,8 +6802,8 @@ define <vscale x 2 x i1> @fcmps_ule_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vv v12, v10, v8 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6813,8 +6813,8 @@ define <vscale x 2 x i1> @fcmps_ule_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfgt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6826,8 +6826,8 @@ define <vscale x 2 x i1> @fcmps_ule_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmflt.vf v10, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6839,9 +6839,9 @@ define <vscale x 2 x i1> @fcmps_une_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v8 -; CHECK-NEXT: vmfle.vv v13, v8, v10 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v2, v8, v10 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6851,9 +6851,9 @@ define <vscale x 2 x i1> @fcmps_une_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfge.vf v10, v8, fa0 -; CHECK-NEXT: vmfle.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6865,9 +6865,9 @@ define <vscale x 2 x i1> @fcmps_une_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vf v10, v8, fa0 -; CHECK-NEXT: vmfge.vf v11, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v11, v10 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v2, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v2, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6879,10 +6879,10 @@ define <vscale x 2 x i1> @fcmps_uno_vv_nxv2f64(<vscale x 2 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; CHECK-NEXT: vmfle.vv v12, v10, v10 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vv v0, v10, v10 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 2 x i1> @llvm.experimental.constrained.fcmps.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 2 x i1> %1 @@ -6893,10 +6893,10 @@ define <vscale x 2 x i1> @fcmps_uno_vf_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmfle.vv v10, v8, v8 -; CHECK-NEXT: vmnot.m v8, v10 -; CHECK-NEXT: vmorn.mm v0, v8, v12 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmfle.vv v2, v8, v8 +; CHECK-NEXT: vmnot.m v8, v2 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6909,10 +6909,10 @@ define <vscale x 2 x i1> @fcmps_uno_fv_nxv2f64(<vscale x 2 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa0 -; CHECK-NEXT: vmfle.vf v12, v10, fa0 -; CHECK-NEXT: vmnot.m v10, v12 -; CHECK-NEXT: vmfle.vv v11, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v10, v11 +; CHECK-NEXT: vmfle.vf v0, v10, fa0 +; CHECK-NEXT: vmnot.m v10, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v10, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 2 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 2 x double> %head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer @@ -6925,9 +6925,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -6937,9 +6937,9 @@ define <vscale x 4 x i1> @fcmps_oeq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -6951,9 +6951,9 @@ define <vscale x 4 x i1> @fcmps_oeq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7101,9 +7101,9 @@ define <vscale x 4 x i1> @fcmps_one_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7113,9 +7113,9 @@ define <vscale x 4 x i1> @fcmps_one_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7127,9 +7127,9 @@ define <vscale x 4 x i1> @fcmps_one_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7141,9 +7141,9 @@ define <vscale x 4 x i1> @fcmps_ord_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7154,9 +7154,9 @@ define <vscale x 4 x i1> @fcmps_ord_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v12, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7169,9 +7169,9 @@ define <vscale x 4 x i1> @fcmps_ord_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v12 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v4 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7183,9 +7183,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmflt.vv v17, v12, v8 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v4, v12, v8 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7195,9 +7195,9 @@ define <vscale x 4 x i1> @fcmps_ueq_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7209,9 +7209,9 @@ define <vscale x 4 x i1> @fcmps_ueq_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmflt.vf v13, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v13, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v4, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7223,8 +7223,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7234,8 +7234,8 @@ define <vscale x 4 x i1> @fcmps_ugt_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7247,8 +7247,8 @@ define <vscale x 4 x i1> @fcmps_ugt_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7260,8 +7260,8 @@ define <vscale x 4 x i1> @fcmps_uge_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v8, v12 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7271,8 +7271,8 @@ define <vscale x 4 x i1> @fcmps_uge_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7284,8 +7284,8 @@ define <vscale x 4 x i1> @fcmps_uge_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7297,8 +7297,8 @@ define <vscale x 4 x i1> @fcmps_ult_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7308,8 +7308,8 @@ define <vscale x 4 x i1> @fcmps_ult_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7321,8 +7321,8 @@ define <vscale x 4 x i1> @fcmps_ult_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7334,8 +7334,8 @@ define <vscale x 4 x i1> @fcmps_ule_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vv v16, v12, v8 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7345,8 +7345,8 @@ define <vscale x 4 x i1> @fcmps_ule_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfgt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7358,8 +7358,8 @@ define <vscale x 4 x i1> @fcmps_ule_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmflt.vf v12, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7371,9 +7371,9 @@ define <vscale x 4 x i1> @fcmps_une_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v8 -; CHECK-NEXT: vmfle.vv v17, v8, v12 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v4, v8, v12 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7383,9 +7383,9 @@ define <vscale x 4 x i1> @fcmps_une_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfge.vf v12, v8, fa0 -; CHECK-NEXT: vmfle.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7397,9 +7397,9 @@ define <vscale x 4 x i1> @fcmps_une_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vf v12, v8, fa0 -; CHECK-NEXT: vmfge.vf v13, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v13, v12 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v4, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v4, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7411,10 +7411,10 @@ define <vscale x 4 x i1> @fcmps_uno_vv_nxv4f64(<vscale x 4 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv4f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vmfle.vv v16, v12, v12 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vv v0, v12, v12 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 4 x i1> @llvm.experimental.constrained.fcmps.nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 4 x i1> %1 @@ -7425,10 +7425,10 @@ define <vscale x 4 x i1> @fcmps_uno_vf_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmfle.vv v12, v8, v8 -; CHECK-NEXT: vmnot.m v8, v12 -; CHECK-NEXT: vmorn.mm v0, v8, v16 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmfle.vv v4, v8, v8 +; CHECK-NEXT: vmnot.m v8, v4 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7441,10 +7441,10 @@ define <vscale x 4 x i1> @fcmps_uno_fv_nxv4f64(<vscale x 4 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.v.f v12, fa0 -; CHECK-NEXT: vmfle.vf v16, v12, fa0 -; CHECK-NEXT: vmnot.m v12, v16 -; CHECK-NEXT: vmfle.vv v13, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v12, v13 +; CHECK-NEXT: vmfle.vf v0, v12, fa0 +; CHECK-NEXT: vmnot.m v12, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v12, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 4 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 4 x double> %head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer @@ -7457,9 +7457,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_oeq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7469,9 +7469,9 @@ define <vscale x 8 x i1> @fcmps_oeq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_oeq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7483,9 +7483,9 @@ define <vscale x 8 x i1> @fcmps_oeq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_oeq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7633,9 +7633,9 @@ define <vscale x 8 x i1> @fcmps_one_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_one_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7645,9 +7645,9 @@ define <vscale x 8 x i1> @fcmps_one_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_one_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7659,9 +7659,9 @@ define <vscale x 8 x i1> @fcmps_one_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_one_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7673,9 +7673,9 @@ define <vscale x 8 x i1> @fcmps_ord_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7686,9 +7686,9 @@ define <vscale x 8 x i1> @fcmps_ord_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7701,9 +7701,9 @@ define <vscale x 8 x i1> @fcmps_ord_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmand.mm v0, v0, v8 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7715,9 +7715,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ueq_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v8, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7727,9 +7727,9 @@ define <vscale x 8 x i1> @fcmps_ueq_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ueq_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmfgt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmfgt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7741,9 +7741,9 @@ define <vscale x 8 x i1> @fcmps_ueq_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ueq_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmflt.vf v17, v8, fa0 -; CHECK-NEXT: vmnor.mm v0, v17, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmflt.vf v8, v8, fa0 +; CHECK-NEXT: vmnor.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7755,8 +7755,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7766,8 +7766,8 @@ define <vscale x 8 x i1> @fcmps_ugt_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ugt_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7779,8 +7779,8 @@ define <vscale x 8 x i1> @fcmps_ugt_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ugt_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7792,8 +7792,8 @@ define <vscale x 8 x i1> @fcmps_uge_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7803,8 +7803,8 @@ define <vscale x 8 x i1> @fcmps_uge_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_uge_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7816,8 +7816,8 @@ define <vscale x 8 x i1> @fcmps_uge_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_uge_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7829,8 +7829,8 @@ define <vscale x 8 x i1> @fcmps_ult_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7840,8 +7840,8 @@ define <vscale x 8 x i1> @fcmps_ult_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ult_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7853,8 +7853,8 @@ define <vscale x 8 x i1> @fcmps_ult_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ult_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7866,8 +7866,8 @@ define <vscale x 8 x i1> @fcmps_ule_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7877,8 +7877,8 @@ define <vscale x 8 x i1> @fcmps_ule_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ule_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfgt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7890,8 +7890,8 @@ define <vscale x 8 x i1> @fcmps_ule_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_ule_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vf v16, v8, fa0 -; CHECK-NEXT: vmnot.m v0, v16 +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7903,9 +7903,9 @@ define <vscale x 8 x i1> @fcmps_une_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_une_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v8, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7915,9 +7915,9 @@ define <vscale x 8 x i1> @fcmps_une_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_une_vf_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfge.vf v16, v8, fa0 -; CHECK-NEXT: vmfle.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: vmfle.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7929,9 +7929,9 @@ define <vscale x 8 x i1> @fcmps_une_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK-LABEL: fcmps_une_fv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vf v16, v8, fa0 -; CHECK-NEXT: vmfge.vf v17, v8, fa0 -; CHECK-NEXT: vmnand.mm v0, v17, v16 +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: vmfge.vf v8, v8, fa0 +; CHECK-NEXT: vmnand.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7943,10 +7943,10 @@ define <vscale x 8 x i1> @fcmps_uno_vv_nxv8f64(<vscale x 8 x double> %va, <vscal ; CHECK-LABEL: fcmps_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v16 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call <vscale x 8 x i1> @llvm.experimental.constrained.fcmps.nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret <vscale x 8 x i1> %1 @@ -7957,10 +7957,10 @@ define <vscale x 8 x i1> @fcmps_uno_vf_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmfle.vv v8, v8, v8 +; CHECK-NEXT: vmnot.m v8, v8 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer @@ -7973,10 +7973,10 @@ define <vscale x 8 x i1> @fcmps_uno_fv_nxv8f64(<vscale x 8 x double> %va, double ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 -; CHECK-NEXT: vmfle.vv v17, v8, v8 -; CHECK-NEXT: vmorn.mm v0, v16, v17 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v8 +; CHECK-NEXT: vmorn.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement <vscale x 8 x double> poison, double %b, i32 0 %splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll index 6d24bc2..eadb28f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -1108,95 +1108,69 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 50 +; CHECK-NEXT: li a3, 42 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 34 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 26 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: slli a5, a5, 4 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a7, 42 -; CHECK-NEXT: mul a3, a3, a7 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v16, v0, a6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 26 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a0, a6, a5 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -1207,34 +1181,33 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: li a1, 42 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index f4ab947..84c4417 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1108,95 +1108,69 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 50 +; CHECK-NEXT: li a3, 42 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 34 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 40 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v0, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 ; CHECK-NEXT: vl8re64.v v8, (a5) ; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 26 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: slli a5, a5, 4 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: srli a6, a1, 3 -; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: li a7, 42 -; CHECK-NEXT: mul a3, a3, a7 -; CHECK-NEXT: add a3, sp, a3 -; CHECK-NEXT: addi a3, a3, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a2) +; CHECK-NEXT: vl8re64.v v16, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a3, a2, 3 -; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v8, (a0) +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a2, a0, 4 -; CHECK-NEXT: add a0, a2, a0 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v16, v0, a6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 26 -; CHECK-NEXT: mul a0, a0, a2 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a0, a6, a5 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: mul a0, a0, a2 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: li a2, 24 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 @@ -1207,34 +1181,33 @@ define <vscale x 16 x double> @vfma_vv_nxv16f64(<vscale x 16 x double> %va, <vsc ; CHECK-NEXT: .LBB92_2: ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 4 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a1, a0, 3 -; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v24, v16, v8, v0.t +; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: li a1, 42 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll index b888fde..63fa6e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll @@ -932,13 +932,13 @@ define <vscale x 32 x i1> @vfptosi_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v12, v16 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 -; ZVFHMIN-NEXT: vmsne.vi v16, v12, 0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v24 +; ZVFHMIN-NEXT: vmsne.vi v4, v12, 0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.x.f.w v8, v16 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 +; ZVFHMIN-NEXT: vslideup.vx v0, v4, a0 ; ZVFHMIN-NEXT: ret %evec = fptosi <vscale x 32 x half> %va to <vscale x 32 x i1> ret <vscale x 32 x i1> %evec @@ -962,13 +962,13 @@ define <vscale x 32 x i1> @vfptoui_nxv32f16_nxv32i1(<vscale x 32 x half> %va) { ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 ; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v12, v16 ; ZVFHMIN-NEXT: vand.vi v12, v12, 1 -; ZVFHMIN-NEXT: vmsne.vi v16, v12, 0 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 -; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v24 +; ZVFHMIN-NEXT: vmsne.vi v4, v12, 0 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vfncvt.rtz.xu.f.w v8, v16 ; ZVFHMIN-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN-NEXT: vmsne.vi v0, v8, 0 ; ZVFHMIN-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslideup.vx v0, v16, a0 +; ZVFHMIN-NEXT: vslideup.vx v0, v4, a0 ; ZVFHMIN-NEXT: ret %evec = fptoui <vscale x 32 x half> %va to <vscale x 32 x i1> ret <vscale x 32 x i1> %evec diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll index e5048ea..9fc87b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll @@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptosi_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.x.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.fptosi.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll index 4b609d0..f9ffcdb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp-mask.ll @@ -76,9 +76,8 @@ define <vscale x 2 x i1> @vfptoui_nxv2i1_nxv2f64(<vscale x 2 x double> %va, <vsc ; CHECK-LABEL: vfptoui_nxv2i1_nxv2f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vfcvt.rtz.xu.f.v v10, v8, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.fptoui.nxv2i1.nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x i1> %m, i32 %evl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index b85197d..685653c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -139,29 +139,29 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a5 -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v16, (a3) -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v8 +; CHECK-NEXT: vslidedown.vx v0, v8, a3 ; CHECK-NEXT: slli a3, a1, 1 ; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a6, a2, a4 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a4, a6, a4 -; CHECK-NEXT: sub a6, a4, a1 -; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vmv1r.v v24, v25 -; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: sltu a0, a4, a5 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a5 ; CHECK-NEXT: bltu a4, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 @@ -169,7 +169,7 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double> ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v28, v16, v0.t ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll index e718492..dd1547f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v10 -; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfeq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfeq.vv v0, v8, v12 -; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfeq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfeq.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfeq.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll index a6dad9e..c78e8d7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v10, v8 -; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v12, v8 -; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfge.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfge.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll index f643a40..b5299faf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v10, v8 -; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v12, v8 -; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfgt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfgt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll index 6c52364..383b175 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v10 -; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfle.vv v0, v8, v12 -; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfle.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfle.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll index 37a9c6b..7d0abe3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmflt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v10 -; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmflt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vv v0, v8, v12 -; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmflt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmflt.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmflt.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll index 5defce4..db077b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmfne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne.ll @@ -187,11 +187,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f16_nxv8f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f16( @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16( define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x half> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16f16_nxv16f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16f16( @@ -391,11 +391,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f32_nxv4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f32( @@ -442,11 +442,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x float> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8f32_nxv8f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8f32( @@ -544,11 +544,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64( define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2f64_nxv2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v10 -; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmfne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2f64( @@ -595,11 +595,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x double> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4f64_nxv4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v12 -; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmfne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4f64( @@ -787,11 +787,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f16_f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f16.f16( @@ -834,11 +834,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16( define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16f16_f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16f16_f16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16f16.f16( @@ -975,11 +975,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f32_f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f32.f32( @@ -1022,11 +1022,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32( define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8f32_f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8f32_f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8f32.f32( @@ -1116,11 +1116,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64( define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2f64_f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmfne.vf v2, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2f64.f64( @@ -1163,11 +1163,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64( define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4f64_f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4f64_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmfne.vf v4, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4f64.f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index e2f5381..f96ab33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v10 -; CHECK-NEXT: vmseq.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmseq.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmseq.vv v0, v8, v12 -; CHECK-NEXT: vmseq.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmseq.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmseq_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmseq.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmseq.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmseq.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmseq.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmseq.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmseq.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmseq_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmseq.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmseq.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index da1c751..133ae98 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -4,19 +4,19 @@ ; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8>, <vscale x 1 x i8>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i8_nxv1i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8> %0, <vscale x 1 x i8> %1, iXLen %2) @@ -24,29 +24,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8( <vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i8_nxv1i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8( <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, @@ -56,19 +55,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8>, <vscale x 2 x i8>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i8_nxv2i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i8_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8> %0, <vscale x 2 x i8> %1, iXLen %2) @@ -76,29 +75,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8( <vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i8_nxv2i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8( <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, @@ -108,19 +106,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8>, <vscale x 4 x i8>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i8_nxv4i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i8_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8> %0, <vscale x 4 x i8> %1, iXLen %2) @@ -128,29 +126,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8( <vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i8_nxv4i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8( <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, @@ -160,19 +157,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8>, <vscale x 8 x i8>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i8_nxv8i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i8_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8> %0, <vscale x 8 x i8> %1, iXLen %2) @@ -180,29 +177,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8( <vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i8_nxv8i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8( <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, @@ -212,19 +208,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8>, <vscale x 16 x i8>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vv_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i8_nxv16i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vv_nxv16i8_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8> %0, <vscale x 16 x i8> %1, iXLen %2) @@ -232,29 +228,28 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8( <vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i8_nxv16i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8( + %mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8( <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, iXLen %4) - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, @@ -264,19 +259,19 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8>, <vscale x 32 x i8>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_vv_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv32i8_nxv32i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vv_nxv32i8_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8> %0, <vscale x 32 x i8> %1, iXLen %2) @@ -284,29 +279,28 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8( <vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, <vscale x 32 x i1>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv32i8_nxv32i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8( + %mask = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8( <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, iXLen %4) - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, @@ -316,19 +310,19 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16>, <vscale x 1 x i16>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i16_nxv1i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i16_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16> %0, <vscale x 1 x i16> %1, iXLen %2) @@ -336,29 +330,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16( <vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i16_nxv1i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16( <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, @@ -368,19 +361,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16>, <vscale x 2 x i16>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i16_nxv2i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i16_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16> %0, <vscale x 2 x i16> %1, iXLen %2) @@ -388,29 +381,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16( <vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i16_nxv2i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16( <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, @@ -420,19 +412,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16>, <vscale x 4 x i16>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i16_nxv4i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i16_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16> %0, <vscale x 4 x i16> %1, iXLen %2) @@ -440,29 +432,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16( <vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i16_nxv4i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16( <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, @@ -472,19 +463,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16>, <vscale x 8 x i16>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i16_nxv8i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i16_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16> %0, <vscale x 8 x i16> %1, iXLen %2) @@ -492,29 +483,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16( <vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i16_nxv8i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16( <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, @@ -524,19 +514,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16>, <vscale x 16 x i16>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vv_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv16i16_nxv16i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vv_nxv16i16_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16> %0, <vscale x 16 x i16> %1, iXLen %2) @@ -544,29 +534,28 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16( <vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv16i16_nxv16i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16( + %mask = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16( <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, iXLen %4) - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, @@ -576,19 +565,19 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32>, <vscale x 1 x i32>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i32_nxv1i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i32_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32> %0, <vscale x 1 x i32> %1, iXLen %2) @@ -596,29 +585,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32( <vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i32_nxv1i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32( <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, @@ -628,19 +616,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32>, <vscale x 2 x i32>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i32_nxv2i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i32_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32> %0, <vscale x 2 x i32> %1, iXLen %2) @@ -648,29 +636,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32( <vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i32_nxv2i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32( <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, @@ -680,19 +667,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i32_nxv4i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i32_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2) @@ -700,29 +687,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32( <vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i32_nxv4i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32( <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, @@ -732,19 +718,19 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32>, <vscale x 8 x i32>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vv_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv8i32_nxv8i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vv_nxv8i32_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32> %0, <vscale x 8 x i32> %1, iXLen %2) @@ -752,29 +738,28 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32( <vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv8i32_nxv8i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32( + %mask = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32( <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, iXLen %4) - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, @@ -784,19 +769,19 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64>, <vscale x 1 x i64>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vv_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv1i64_nxv1i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vv_nxv1i64_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v9 +; CHECK-NEXT: vmsle.vv v0, v9, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64> %0, <vscale x 1 x i64> %1, iXLen %2) @@ -804,29 +789,28 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64( <vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv1i64_nxv1i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmseq.vv v8, v8, v9 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmseq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsle.vv v0, v9, v8 +; CHECK-NEXT: vmsle.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64( + %mask = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64( <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, iXLen %4) - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, @@ -836,19 +820,19 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64>, <vscale x 2 x i64>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vv_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv2i64_nxv2i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vv_nxv2i64_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v10 +; CHECK-NEXT: vmsle.vv v0, v10, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64> %0, <vscale x 2 x i64> %1, iXLen %2) @@ -856,29 +840,28 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64( <vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv2i64_nxv2i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmseq.vv v14, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmseq.vv v8, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v10, v8 +; CHECK-NEXT: vmsle.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64( + %mask = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64( <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, iXLen %4) - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, @@ -888,19 +871,19 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64>, <vscale x 4 x i64>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vv_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vv_nxv4i64_nxv4i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vv_nxv4i64_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vv v0, v8, v12 +; CHECK-NEXT: vmsle.vv v0, v12, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64> %0, <vscale x 4 x i64> %1, iXLen %2) @@ -908,29 +891,28 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64( <vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vv_nxv4i64_nxv4i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmseq.vv v20, v8, v12 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmseq.vv v8, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsle.vv v0, v12, v8 +; CHECK-NEXT: vmsle.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %mask = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64( + %mask = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64( <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, iXLen %4) - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, @@ -940,19 +922,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8>, i8, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i8_i8(<vscale x 1 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i8_i8(<vscale x 1 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8> %0, i8 %1, iXLen %2) @@ -960,24 +943,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1>, <vscale x 1 x i8>, i8, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, @@ -987,19 +970,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8>, i8, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i8_i8(<vscale x 2 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i8_i8(<vscale x 2 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8> %0, i8 %1, iXLen %2) @@ -1007,24 +991,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1>, <vscale x 2 x i8>, i8, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, @@ -1034,19 +1018,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8>, i8, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i8_i8(<vscale x 4 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i8_i8(<vscale x 4 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8> %0, i8 %1, iXLen %2) @@ -1054,24 +1039,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1>, <vscale x 4 x i8>, i8, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, @@ -1081,19 +1066,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8>, i8, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i8_i8(<vscale x 8 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i8_i8(<vscale x 8 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8> %0, i8 %1, iXLen %2) @@ -1101,24 +1087,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1>, <vscale x 8 x i8>, i8, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, @@ -1128,19 +1114,20 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8>, i8, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vx_nxv16i8_i8(<vscale x 16 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i8_i8(<vscale x 16 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8> %0, i8 %1, iXLen %2) @@ -1148,24 +1135,24 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1>, <vscale x 16 x i8>, i8, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, @@ -1175,19 +1162,20 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8>, i8, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_vx_nxv32i8_i8(<vscale x 32 x i8> %0, i8 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vx_nxv32i8_i8(<vscale x 32 x i8> %0, i8 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8> %0, i8 %1, iXLen %2) @@ -1195,24 +1183,24 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( +declare <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1>, <vscale x 32 x i8>, i8, <vscale x 32 x i1>, iXLen); -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, @@ -1222,19 +1210,20 @@ entry: ret <vscale x 32 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16>, i16, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i16_i16(<vscale x 1 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i16_i16(<vscale x 1 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16> %0, i16 %1, iXLen %2) @@ -1242,24 +1231,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1>, <vscale x 1 x i16>, i16, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, @@ -1269,19 +1258,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16>, i16, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i16_i16(<vscale x 2 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i16_i16(<vscale x 2 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16> %0, i16 %1, iXLen %2) @@ -1289,24 +1279,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1>, <vscale x 2 x i16>, i16, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, @@ -1316,19 +1306,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16>, i16, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i16_i16(<vscale x 4 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i16_i16(<vscale x 4 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16> %0, i16 %1, iXLen %2) @@ -1336,24 +1327,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1>, <vscale x 4 x i16>, i16, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, @@ -1363,19 +1354,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16>, i16, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i16_i16(<vscale x 8 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i16_i16(<vscale x 8 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16> %0, i16 %1, iXLen %2) @@ -1383,24 +1375,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1>, <vscale x 8 x i16>, i16, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, @@ -1410,19 +1402,20 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16>, i16, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_vx_nxv16i16_i16(<vscale x 16 x i16> %0, i16 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vx_nxv16i16_i16(<vscale x 16 x i16> %0, i16 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16> %0, i16 %1, iXLen %2) @@ -1430,24 +1423,24 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( +declare <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1>, <vscale x 16 x i16>, i16, <vscale x 16 x i1>, iXLen); -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, @@ -1457,19 +1450,20 @@ entry: ret <vscale x 16 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32>, i32, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i32_i32(<vscale x 1 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i32_i32(<vscale x 1 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32> %0, i32 %1, iXLen %2) @@ -1477,24 +1471,24 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1>, <vscale x 1 x i32>, i32, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, @@ -1504,19 +1498,20 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32>, i32, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i32_i32(<vscale x 2 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i32_i32(<vscale x 2 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmnot.m v0, v8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32> %0, i32 %1, iXLen %2) @@ -1524,24 +1519,24 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1>, <vscale x 2 x i32>, i32, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vx v10, v8, a0, v0.t -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vmslt.vx v10, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v10, v9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, @@ -1551,19 +1546,20 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32>, i32, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i32_i32(<vscale x 4 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i32_i32(<vscale x 4 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32> %0, i32 %1, iXLen %2) @@ -1571,24 +1567,24 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1>, <vscale x 4 x i32>, i32, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, @@ -1598,19 +1594,20 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32>, i32, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_vx_nxv8i32_i32(<vscale x 8 x i32> %0, i32 %1, iXLen %2) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vx_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vx_nxv8i32_i32(<vscale x 8 x i32> %0, i32 %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vx v0, v8, a0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32> %0, i32 %1, iXLen %2) @@ -1618,24 +1615,24 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( +declare <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1>, <vscale x 8 x i32>, i32, <vscale x 8 x i1>, iXLen); -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vx_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, @@ -1645,13 +1642,13 @@ entry: ret <vscale x 8 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64>, i64, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vx_nxv1i64_i64(<vscale x 1 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1659,17 +1656,18 @@ define <vscale x 1 x i1> @intrinsic_vmseq_vx_nxv1i64_i64(<vscale x 1 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; RV32-NEXT: vlse64.v v9, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v9 +; RV32-NEXT: vmsle.vv v0, v9, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv1i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v8, v8, a0 +; RV64-NEXT: vmnot.m v0, v8 ; RV64-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64> %0, i64 %1, iXLen %2) @@ -1677,39 +1675,39 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( +declare <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1>, <vscale x 1 x i64>, i64, <vscale x 1 x i1>, iXLen); -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmseq.vv v10, v8, v11, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsle.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; RV64-NEXT: vmv1r.v v0, v9 -; RV64-NEXT: vmseq.vx v10, v8, a0, v0.t -; RV64-NEXT: vmv.v.v v0, v10 +; RV64-NEXT: vmslt.vx v10, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v10, v9 ; RV64-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, @@ -1719,13 +1717,13 @@ entry: ret <vscale x 1 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64>, i64, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vx_nxv2i64_i64(<vscale x 2 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1733,17 +1731,18 @@ define <vscale x 2 x i1> @intrinsic_vmseq_vx_nxv2i64_i64(<vscale x 2 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v10 +; RV32-NEXT: vmsle.vv v0, v10, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv2i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64> %0, i64 %1, iXLen %2) @@ -1751,15 +1750,15 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( +declare <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1>, <vscale x 2 x i64>, i64, <vscale x 2 x i1>, iXLen); -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1767,23 +1766,23 @@ define <vscale x 2 x i1> @intrinsic_vmseq_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmseq.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsle.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmseq.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmslt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v2, v10 ; RV64-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, @@ -1793,13 +1792,13 @@ entry: ret <vscale x 2 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64>, i64, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, iXLen %2) nounwind { -; RV32-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vx_nxv4i64_i64(<vscale x 4 x i64> %0, i64 %1, iXLen %2) nounwind { +; RV32-LABEL: intrinsic_vmsge_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1807,17 +1806,18 @@ define <vscale x 4 x i1> @intrinsic_vmseq_vx_nxv4i64_i64(<vscale x 4 x i64> %0, ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmseq.vv v0, v8, v12 +; RV32-NEXT: vmsle.vv v0, v12, v8 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_vx_nxv4i64_i64: +; RV64-LABEL: intrinsic_vmsge_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmseq.vx v0, v8, a0 +; RV64-NEXT: vmslt.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64> %0, i64 %1, iXLen %2) @@ -1825,15 +1825,15 @@ entry: ret <vscale x 4 x i1> %a } -declare <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( +declare <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1>, <vscale x 4 x i64>, i64, <vscale x 4 x i1>, iXLen); -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { -; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw a1, 12(sp) @@ -1841,23 +1841,23 @@ define <vscale x 4 x i1> @intrinsic_vmseq_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmseq.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsle.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; -; RV64-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: +; RV64-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmseq.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmslt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v4, v12 ; RV64-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, @@ -1867,452 +1867,452 @@ entry: ret <vscale x 4 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i8_i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i8_i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -16 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8.i8( <vscale x 1 x i8> %0, - i8 9, + i8 -15, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i8_i8: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -15, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i8.i8( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( <vscale x 1 x i1> %0, <vscale x 1 x i8> %1, - i8 9, + i8 -14, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i8_i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i8_i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -14 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i8.i8( <vscale x 2 x i8> %0, - i8 9, + i8 -13, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i8_i8: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -13, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i8.i8( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( <vscale x 2 x i1> %0, <vscale x 2 x i8> %1, - i8 9, + i8 -12, <vscale x 2 x i1> %2, iXLen %3) ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i8_i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i8_i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -12 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i8.i8( <vscale x 4 x i8> %0, - i8 9, + i8 -11, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i8_i8: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -11, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i8.i8( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( <vscale x 4 x i1> %0, <vscale x 4 x i8> %1, - i8 9, + i8 -10, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i8_i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i8_i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i8.i8( <vscale x 8 x i8> %0, - i8 9, + i8 -9, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i8_i8: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -9, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i8.i8( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( <vscale x 8 x i1> %0, <vscale x 8 x i8> %1, - i8 9, + i8 -8, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_vi_nxv16i8_i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_vi_nxv16i8_i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i8.i8( <vscale x 16 x i8> %0, - i8 9, + i8 -7, iXLen %1) ret <vscale x 16 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i8_i8: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i8.i8( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( <vscale x 16 x i1> %0, <vscale x 16 x i8> %1, - i8 9, + i8 -6, <vscale x 16 x i1> %2, iXLen %3) ret <vscale x 16 x i1> %a } -define <vscale x 32 x i1> @intrinsic_vmseq_vi_nxv32i8_i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_vi_nxv32i8_i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -6 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.nxv32i8.i8( <vscale x 32 x i8> %0, - i8 9, + i8 -5, iXLen %1) ret <vscale x 32 x i1> %a } -define <vscale x 32 x i1> @intrinsic_vmseq_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv32i8_i8: +define <vscale x 32 x i1> @intrinsic_vmsge_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 32 x i1> @llvm.riscv.vmseq.mask.nxv32i8.i8( + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( <vscale x 32 x i1> %0, <vscale x 32 x i8> %1, - i8 9, + i8 -4, <vscale x 32 x i1> %2, iXLen %3) ret <vscale x 32 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i16_i16(<vscale x 1 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i16_i16(<vscale x 1 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i16.i16( <vscale x 1 x i16> %0, - i16 9, + i16 -3, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i16_i16: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -3, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i16.i16( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( <vscale x 1 x i1> %0, <vscale x 1 x i16> %1, - i16 9, + i16 -2, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i16_i16(<vscale x 2 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i16_i16(<vscale x 2 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i16.i16( <vscale x 2 x i16> %0, - i16 9, + i16 -1, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i16_i16: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, -1, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i16.i16( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( <vscale x 2 x i1> %0, <vscale x 2 x i16> %1, - i16 9, + i16 0, <vscale x 2 x i1> %2, iXLen %3) ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i16_i16(<vscale x 4 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i16_i16(<vscale x 4 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, -1 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i16.i16( <vscale x 4 x i16> %0, - i16 9, + i16 0, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i16_i16: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 0, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i16.i16( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( <vscale x 4 x i1> %0, <vscale x 4 x i16> %1, - i16 9, + i16 1, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i16_i16(<vscale x 8 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i16_i16(<vscale x 8 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 1 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i16.i16( <vscale x 8 x i16> %0, - i16 9, + i16 2, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i16_i16: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i16.i16( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( <vscale x 8 x i1> %0, <vscale x 8 x i16> %1, - i16 9, + i16 3, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_vi_nxv16i16_i16(<vscale x 16 x i16> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_vi_nxv16i16_i16(<vscale x 16 x i16> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 3 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.nxv16i16.i16( <vscale x 16 x i16> %0, - i16 9, + i16 4, iXLen %1) ret <vscale x 16 x i1> %a } -define <vscale x 16 x i1> @intrinsic_vmseq_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv16i16_i16: +define <vscale x 16 x i1> @intrinsic_vmsge_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 16 x i1> @llvm.riscv.vmseq.mask.nxv16i16.i16( + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( <vscale x 16 x i1> %0, <vscale x 16 x i16> %1, - i16 9, + i16 5, <vscale x 16 x i1> %2, iXLen %3) ret <vscale x 16 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i32_i32(<vscale x 1 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i32_i32(<vscale x 1 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 5 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i32.i32( <vscale x 1 x i32> %0, - i32 9, + i32 6, iXLen %1) ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i32_i32: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 6, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i32.i32( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( <vscale x 1 x i1> %0, <vscale x 1 x i32> %1, - i32 9, + i32 7, <vscale x 1 x i1> %2, iXLen %3) ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i32_i32(<vscale x 2 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i32_i32(<vscale x 2 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 7 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i32.i32( <vscale x 2 x i32> %0, - i32 9, + i32 8, iXLen %1) ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i32_i32: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i32.i32( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( <vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 9, @@ -2322,84 +2322,84 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i32_i32(<vscale x 4 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i32_i32(<vscale x 4 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 9 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i32.i32( <vscale x 4 x i32> %0, - i32 9, + i32 10, iXLen %1) ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i32_i32: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i32.i32( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( <vscale x 4 x i1> %0, <vscale x 4 x i32> %1, - i32 9, + i32 11, <vscale x 4 x i1> %2, iXLen %3) ret <vscale x 4 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_vi_nxv8i32_i32(<vscale x 8 x i32> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_vi_nxv8i32_i32(<vscale x 8 x i32> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 11 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.nxv8i32.i32( <vscale x 8 x i32> %0, - i32 9, + i32 12, iXLen %1) ret <vscale x 8 x i1> %a } -define <vscale x 8 x i1> @intrinsic_vmseq_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv8i32_i32: +define <vscale x 8 x i1> @intrinsic_vmsge_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 8 x i1> @llvm.riscv.vmseq.mask.nxv8i32.i32( + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( <vscale x 8 x i1> %0, <vscale x 8 x i32> %1, - i32 9, + i32 13, <vscale x 8 x i1> %2, iXLen %3) ret <vscale x 8 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_vi_nxv1i64_i64(<vscale x 1 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_vi_nxv1i64_i64(<vscale x 1 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i64.i64( <vscale x 1 x i64> %0, i64 9, iXLen %1) @@ -2407,17 +2407,17 @@ entry: ret <vscale x 1 x i1> %a } -define <vscale x 1 x i1> @intrinsic_vmseq_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv1i64_i64: +define <vscale x 1 x i1> @intrinsic_vmsge_mask_vi_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv1i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vmv1r.v v10, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmseq.vi v10, v8, 9, v0.t +; CHECK-NEXT: vmsgt.vi v10, v8, 8, v0.t ; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret entry: - %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.mask.nxv1i64.i64( + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( <vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 9, @@ -2427,14 +2427,14 @@ entry: ret <vscale x 1 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_vi_nxv2i64_i64(<vscale x 2 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_vi_nxv2i64_i64(<vscale x 2 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.nxv2i64.i64( <vscale x 2 x i64> %0, i64 9, iXLen %1) @@ -2442,17 +2442,17 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 2 x i1> @intrinsic_vmseq_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv2i64_i64: +define <vscale x 2 x i1> @intrinsic_vmsge_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmseq.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: - %a = call <vscale x 2 x i1> @llvm.riscv.vmseq.mask.nxv2i64.i64( + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( <vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 9, @@ -2462,14 +2462,14 @@ entry: ret <vscale x 2 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_vi_nxv4i64_i64(<vscale x 4 x i64> %0, iXLen %1) nounwind { -; CHECK-LABEL: intrinsic_vmseq_vi_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_vi_nxv4i64_i64(<vscale x 4 x i64> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vmsge_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; CHECK-NEXT: vmseq.vi v0, v8, 9 +; CHECK-NEXT: vmsgt.vi v0, v8, 8 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.nxv4i64.i64( <vscale x 4 x i64> %0, i64 9, iXLen %1) @@ -2477,17 +2477,17 @@ entry: ret <vscale x 4 x i1> %a } -define <vscale x 4 x i1> @intrinsic_vmseq_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { -; CHECK-LABEL: intrinsic_vmseq_mask_vi_nxv4i64_i64: +define <vscale x 4 x i1> @intrinsic_vmsge_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmseq.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: - %a = call <vscale x 4 x i1> @llvm.riscv.vmseq.mask.nxv4i64.i64( + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( <vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 9, @@ -2496,3 +2496,364 @@ entry: ret <vscale x 4 x i1> %a } + +; Test cases where the mask and maskedoff are the same value. +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i8.i8( + <vscale x 1 x i1> %0, + <vscale x 1 x i8> %1, + i8 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i8.i8( + <vscale x 2 x i1> %0, + <vscale x 2 x i8> %1, + i8 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i8.i8( + <vscale x 4 x i1> %0, + <vscale x 4 x i8> %1, + i8 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i8.i8( + <vscale x 8 x i1> %0, + <vscale x 8 x i8> %1, + i8 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i8.i8( + <vscale x 16 x i1> %0, + <vscale x 16 x i8> %1, + i8 %2, + <vscale x 16 x i1> %0, + iXLen %3) + + ret <vscale x 16 x i1> %a +} + +define <vscale x 32 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv32i8_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i1> @llvm.riscv.vmsge.mask.nxv32i8.i8( + <vscale x 32 x i1> %0, + <vscale x 32 x i8> %1, + i8 %2, + <vscale x 32 x i1> %0, + iXLen %3) + + ret <vscale x 32 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i16.i16( + <vscale x 1 x i1> %0, + <vscale x 1 x i16> %1, + i16 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i16.i16( + <vscale x 2 x i1> %0, + <vscale x 2 x i16> %1, + i16 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i16.i16( + <vscale x 4 x i1> %0, + <vscale x 4 x i16> %1, + i16 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i16.i16( + <vscale x 8 x i1> %0, + <vscale x 8 x i16> %1, + i16 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 16 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv16i16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmsge.mask.nxv16i16.i16( + <vscale x 16 x i1> %0, + <vscale x 16 x i16> %1, + i16 %2, + <vscale x 16 x i1> %0, + iXLen %3) + + ret <vscale x 16 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i32.i32( + <vscale x 1 x i1> %0, + <vscale x 1 x i32> %1, + i32 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vmslt.vx v8, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i32.i32( + <vscale x 2 x i1> %0, + <vscale x 2 x i32> %1, + i32 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vmslt.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i32.i32( + <vscale x 4 x i1> %0, + <vscale x 4 x i32> %1, + i32 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} + +define <vscale x 8 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv8i32_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vmslt.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmsge.mask.nxv8i32.i32( + <vscale x 8 x i1> %0, + <vscale x 8 x i32> %1, + i32 %2, + <vscale x 8 x i1> %0, + iXLen %3) + + ret <vscale x 8 x i1> %a +} + +define <vscale x 1 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vlse64.v v9, (a0), zero +; RV32-NEXT: vmsle.vv v0, v9, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv1i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; RV64-NEXT: vmslt.vx v8, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v8 +; RV64-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmsge.mask.nxv1i64.i64( + <vscale x 1 x i1> %0, + <vscale x 1 x i64> %1, + i64 %2, + <vscale x 1 x i1> %0, + iXLen %3) + + ret <vscale x 1 x i1> %a +} + +define <vscale x 2 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmsle.vv v0, v10, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; RV64-NEXT: vmslt.vx v2, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v2 +; RV64-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmsge.mask.nxv2i64.i64( + <vscale x 2 x i1> %0, + <vscale x 2 x i64> %1, + i64 %2, + <vscale x 2 x i1> %0, + iXLen %3) + + ret <vscale x 2 x i1> %a +} + +define <vscale x 4 x i1> @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, i64 %2, iXLen %3) nounwind { +; RV32-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vmsle.vv v0, v12, v8, v0.t +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; RV64-NEXT: vmslt.vx v4, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v4 +; RV64-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmsge.mask.nxv4i64.i64( + <vscale x 4 x i1> %0, + <vscale x 4 x i64> %1, + i64 %2, + <vscale x 4 x i1> %0, + iXLen %3) + + ret <vscale x 4 x i1> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll index 9410a99..c9abf43 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu.ll @@ -34,10 +34,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i8( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8(<vscale x 1 x i1> %0, <vscale x 1 x i8> %1, <vscale x 1 x i8> %2, <vscale x 1 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -86,10 +85,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i8( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8(<vscale x 2 x i1> %0, <vscale x 2 x i8> %1, <vscale x 2 x i8> %2, <vscale x 2 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -138,10 +136,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i8( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8(<vscale x 4 x i1> %0, <vscale x 4 x i8> %1, <vscale x 4 x i8> %2, <vscale x 4 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -190,10 +187,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i8( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8(<vscale x 8 x i1> %0, <vscale x 8 x i8> %1, <vscale x 8 x i8> %2, <vscale x 8 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -242,12 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8( @@ -294,12 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8( @@ -346,10 +340,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i16( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16(<vscale x 1 x i1> %0, <vscale x 1 x i16> %1, <vscale x 1 x i16> %2, <vscale x 1 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -398,10 +391,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i16( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16(<vscale x 2 x i1> %0, <vscale x 2 x i16> %1, <vscale x 2 x i16> %2, <vscale x 2 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -450,10 +442,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i16( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16(<vscale x 4 x i1> %0, <vscale x 4 x i16> %1, <vscale x 4 x i16> %2, <vscale x 4 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -502,12 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16( @@ -554,12 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16( @@ -606,10 +595,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i32( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32(<vscale x 1 x i1> %0, <vscale x 1 x i32> %1, <vscale x 1 x i32> %2, <vscale x 1 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv1r.v v0, v11 ; CHECK-NEXT: ret @@ -658,10 +646,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i32( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32(<vscale x 2 x i1> %0, <vscale x 2 x i32> %1, <vscale x 2 x i32> %2, <vscale x 2 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -710,12 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32( @@ -762,12 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32( @@ -814,10 +799,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.mask.nxv1i64( define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64(<vscale x 1 x i1> %0, <vscale x 1 x i64> %1, <vscale x 1 x i64> %2, <vscale x 1 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vmsleu.vv v8, v9, v8 ; CHECK-NEXT: vmv1r.v v11, v0 -; CHECK-NEXT: vmv.v.v v0, v8 +; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu +; CHECK-NEXT: vmsleu.vv v0, v9, v8 ; CHECK-NEXT: vmsleu.vv v11, v10, v9, v0.t ; CHECK-NEXT: vmv.v.v v0, v11 ; CHECK-NEXT: ret @@ -866,12 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vmsleu.vv v14, v10, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v14 -; CHECK-NEXT: vmsleu.vv v8, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v10, v8 +; CHECK-NEXT: vmsleu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64( @@ -918,12 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vmsleu.vv v20, v12, v8 -; CHECK-NEXT: vmv1r.v v8, v0 -; CHECK-NEXT: vmv1r.v v0, v20 -; CHECK-NEXT: vmsleu.vv v8, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsleu.vv v0, v12, v8 +; CHECK-NEXT: vmsleu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64( @@ -1141,8 +1123,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i8_i8(<vscale x 16 x i8> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i8.i8( @@ -1163,11 +1145,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -1189,8 +1171,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_vx_nxv32i8_i8(<vscale x 32 x i8> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.nxv32i8.i8( @@ -1211,11 +1193,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -1381,8 +1363,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i16_i16(<vscale x 8 x i16> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i16.i16( @@ -1403,11 +1385,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -1429,8 +1411,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_vx_nxv16i16_i16(<vscale x 16 x i16> ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.nxv16i16.i16( @@ -1451,11 +1433,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -1573,8 +1555,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i32_i32(<vscale x 4 x i32> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmnot.m v0, v10 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i32.i32( @@ -1595,11 +1577,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v11, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v2, v10 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -1621,8 +1603,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_vx_nxv8i32_i32(<vscale x 8 x i32> %0, ; CHECK-LABEL: intrinsic_vmsgeu_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmnot.m v0, v12 +; CHECK-NEXT: vmsltu.vx v0, v8, a0 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.nxv8i32.i32( @@ -1643,11 +1625,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmxor.mm v0, v13, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmxor.mm v0, v4, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -1708,11 +1690,11 @@ define <vscale x 1 x i1> @intrinsic_vmsgeu_mask_vx_nxv1i64_i64(<vscale x 1 x i1> ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu -; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v9 -; RV32-NEXT: vmsleu.vv v10, v11, v8, v0.t -; RV32-NEXT: vmv.v.v v0, v10 +; RV32-NEXT: vmsleu.vv v11, v10, v8, v0.t +; RV32-NEXT: vmv.v.v v0, v11 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1756,8 +1738,8 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_vx_nxv2i64_i64(<vscale x 2 x i64> %0, ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmsltu.vx v10, v8, a0 -; RV64-NEXT: vmnot.m v0, v10 +; RV64-NEXT: vmsltu.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.nxv2i64.i64( @@ -1784,20 +1766,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsleu.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsleu.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmxor.mm v0, v11, v10 +; RV64-NEXT: vmsltu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v2, v10 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -1831,8 +1813,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_vx_nxv4i64_i64(<vscale x 4 x i64> %0, ; RV64-LABEL: intrinsic_vmsgeu_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0 -; RV64-NEXT: vmnot.m v0, v12 +; RV64-NEXT: vmsltu.vx v0, v8, a0 +; RV64-NEXT: vmnot.m v0, v0 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.nxv4i64.i64( @@ -1859,20 +1841,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsleu.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsleu.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmxor.mm v0, v13, v12 +; RV64-NEXT: vmsltu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmxor.mm v0, v4, v12 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( @@ -2043,11 +2025,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2078,11 +2060,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgeu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2230,11 +2212,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2265,11 +2247,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgeu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2370,11 +2352,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2405,11 +2387,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgeu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -2475,11 +2457,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgeu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, -16, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -2510,11 +2492,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgeu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, -14, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( @@ -2604,8 +2586,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i8.i8( @@ -2622,8 +2604,8 @@ define <vscale x 32 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgeu.mask.nxv32i8.i8( @@ -2694,8 +2676,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i16.i16( @@ -2712,8 +2694,8 @@ define <vscale x 16 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16(<vsca ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgeu.mask.nxv16i16.i16( @@ -2766,8 +2748,8 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vmsltu.vx v10, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v10 +; CHECK-NEXT: vmsltu.vx v2, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i32.i32( @@ -2784,8 +2766,8 @@ define <vscale x 8 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32(<vscale ; CHECK-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vmsltu.vx v12, v8, a0 -; CHECK-NEXT: vmandn.mm v0, v0, v12 +; CHECK-NEXT: vmsltu.vx v4, v8, a0 +; CHECK-NEXT: vmandn.mm v0, v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgeu.mask.nxv8i32.i32( @@ -2836,18 +2818,16 @@ define <vscale x 2 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64(<vscale ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 -; RV32-NEXT: vmsleu.vv v10, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vmsleu.vv v0, v10, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; RV64-NEXT: vmsltu.vx v10, v8, a0 -; RV64-NEXT: vmandn.mm v0, v0, v10 +; RV64-NEXT: vmsltu.vx v2, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgeu.mask.nxv2i64.i64( @@ -2868,18 +2848,16 @@ define <vscale x 4 x i1> @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64(<vscale ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v12, v0 -; RV32-NEXT: vmsleu.vv v12, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vlse64.v v12, (a0), zero +; RV32-NEXT: vmsleu.vv v0, v12, v8, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; RV64-NEXT: vmsltu.vx v12, v8, a0 -; RV64-NEXT: vmandn.mm v0, v0, v12 +; RV64-NEXT: vmsltu.vx v4, v8, a0 +; RV64-NEXT: vmandn.mm v0, v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgeu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll index bd64093..7d544af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgt.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgt.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgt.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v10, v8 -; CHECK-NEXT: vmslt.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v12, v8 -; CHECK-NEXT: vmslt.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgt.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmslt.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmslt.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsgt.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsgt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmslt.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmslt.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsgt.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsgt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgt.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgt.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgt.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgt.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgt.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgt.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgt.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgt.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index 37a022e..65a9ab5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v10, v8 -; CHECK-NEXT: vmsltu.vv v14, v12, v10, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v12, v8 -; CHECK-NEXT: vmsltu.vv v20, v16, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsltu.vv v11, v12, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsltu.vv v2, v12, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsgtu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsgtu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsltu.vv v13, v16, v8, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsltu.vv v4, v16, v8, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsgtu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsgtu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsgtu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsgtu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsgtu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsgtu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsgtu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsgtu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsgtu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsgtu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsgtu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsgtu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsgtu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgtu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsgtu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsgtu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsgtu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index cac4cbe..93d3852 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsle.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsle.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsle.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v10 -; CHECK-NEXT: vmsle.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsle.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsle.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsle.vv v0, v8, v12 -; CHECK-NEXT: vmsle.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsle.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsle.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsle_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsle_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsle_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsle_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsle.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsle.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsle.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsle.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsle_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsle.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsle.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsle.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsle.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsle_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsle.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsle_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsle.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsle_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsle.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsle_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsle.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsle_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsle_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsle.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 75dc38f..8345365 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsleu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsleu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsleu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v10 -; CHECK-NEXT: vmsleu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsleu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsleu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsleu.vv v0, v8, v12 -; CHECK-NEXT: vmsleu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsleu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsleu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsleu.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsleu.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsleu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsleu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsleu.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsleu.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsleu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsleu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsleu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsleu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsleu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsleu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsleu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsleu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsleu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsleu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsleu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsleu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsleu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index 2efbe46..5bf07a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmslt.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmslt.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmslt.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v10 -; CHECK-NEXT: vmslt.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmslt.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmslt.vv v0, v8, v12 -; CHECK-NEXT: vmslt.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmslt.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmslt.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmslt_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmslt_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmslt.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmslt.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmslt_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmslt.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmslt.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmslt_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmslt.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmslt.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmslt.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmslt.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmslt_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmslt.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmslt.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmslt.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmslt.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmslt_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmslt.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmslt_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmslt.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmslt_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmslt.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmslt_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsle.vi v11, v8, 8, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsle.vi v2, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmslt.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmslt_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmslt_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsle.vi v13, v8, 8, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsle.vi v4, v8, 8, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmslt.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index c344dff..2068ec2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsltu.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsltu.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsltu.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v10 -; CHECK-NEXT: vmsltu.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsltu.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsltu.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsltu.vv v0, v8, v12 -; CHECK-NEXT: vmsltu.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsltu.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsltu.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsltu.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsltu.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsltu.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsltu.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsltu.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsltu.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsltu.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsltu.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsltu.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsltu.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsltu.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsltu.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, -7, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, -7, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsltu_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, -5, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, -5, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsltu.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 2, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 2, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsltu_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 4, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 4, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsltu.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, 10, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, 10, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsltu_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, 12, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, 12, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsltu.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsltu_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsleu.vi v11, v8, -16, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsleu.vi v2, v8, -16, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsltu.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsltu_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsltu_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsleu.vi v13, v8, -14, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsleu.vi v4, v8, -14, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsltu.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index 0c34dd7..88a09e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -238,11 +238,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i8( @@ -289,11 +289,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8( define <vscale x 32 x i1> @intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i8> %2, <vscale x 32 x i8> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 32 x i1> @llvm.riscv.vmsne.nxv32i8( @@ -493,11 +493,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2, <vscale x 8 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i16( @@ -544,11 +544,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i16> %2, <vscale x 16 x i16> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 16 x i1> @llvm.riscv.vmsne.nxv16i16( @@ -697,11 +697,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2, <vscale x 4 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i32( @@ -748,11 +748,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i32> %2, <vscale x 8 x i32> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 8 x i1> @llvm.riscv.vmsne.nxv8i32( @@ -850,11 +850,11 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64( define <vscale x 2 x i1> @intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2, <vscale x 2 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v10 -; CHECK-NEXT: vmsne.vv v14, v10, v12, v0.t -; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: vmsne.vv v2, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %mask = call <vscale x 2 x i1> @llvm.riscv.vmsne.nxv2i64( @@ -901,11 +901,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i64> %2, <vscale x 4 x i64> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmsne.vv v0, v8, v12 -; CHECK-NEXT: vmsne.vv v20, v12, v16, v0.t -; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: vmsne.vv v4, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %mask = call <vscale x 4 x i1> @llvm.riscv.vmsne.nxv4i64( @@ -1140,11 +1140,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, i8 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( @@ -1187,11 +1187,11 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( define <vscale x 32 x i1> @intrinsic_vmsne_mask_vx_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, i8 %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( @@ -1375,11 +1375,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, i16 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( @@ -1422,11 +1422,11 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( define <vscale x 16 x i1> @intrinsic_vmsne_mask_vx_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, i16 %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( @@ -1563,11 +1563,11 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, i32 %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vx v11, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vx v2, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( @@ -1610,11 +1610,11 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( define <vscale x 8 x i1> @intrinsic_vmsne_mask_vx_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, i32 %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vx_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vx v13, v8, a0, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vx v4, v8, a0, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( @@ -1749,20 +1749,20 @@ define <vscale x 2 x i1> @intrinsic_vmsne_mask_vx_nxv2i64_i64(<vscale x 2 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 +; RV32-NEXT: vmv1r.v v2, v0 ; RV32-NEXT: vmv1r.v v0, v10 -; RV32-NEXT: vmsne.vv v11, v8, v12, v0.t -; RV32-NEXT: vmv1r.v v0, v11 +; RV32-NEXT: vmsne.vv v2, v8, v12, v0.t +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v11, v0 +; RV64-NEXT: vmv1r.v v2, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; RV64-NEXT: vmv1r.v v0, v10 -; RV64-NEXT: vmsne.vx v11, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v11 +; RV64-NEXT: vmsne.vx v2, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v2 ; RV64-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64( @@ -1823,20 +1823,20 @@ define <vscale x 4 x i1> @intrinsic_vmsne_mask_vx_nxv4i64_i64(<vscale x 4 x i1> ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 +; RV32-NEXT: vmv1r.v v4, v0 ; RV32-NEXT: vmv1r.v v0, v12 -; RV32-NEXT: vmsne.vv v13, v8, v16, v0.t -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmsne.vv v4, v8, v16, v0.t +; RV32-NEXT: vmv1r.v v0, v4 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: vmv1r.v v13, v0 +; RV64-NEXT: vmv1r.v v4, v0 ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vmsne.vx v13, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v13 +; RV64-NEXT: vmsne.vx v4, v8, a0, v0.t +; RV64-NEXT: vmv1r.v v0, v4 ; RV64-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64( @@ -2007,11 +2007,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i8_i8(<vscale x 16 x i1> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i8.i8( @@ -2042,11 +2042,11 @@ entry: define <vscale x 32 x i1> @intrinsic_vmsne_mask_vi_nxv32i8_i8(<vscale x 32 x i1> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv32i8_i8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 32 x i1> @llvm.riscv.vmsne.mask.nxv32i8.i8( @@ -2182,11 +2182,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i16_i16(<vscale x 8 x i1> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i16.i16( @@ -2217,11 +2217,11 @@ entry: define <vscale x 16 x i1> @intrinsic_vmsne_mask_vi_nxv16i16_i16(<vscale x 16 x i1> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv16i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i1> @llvm.riscv.vmsne.mask.nxv16i16.i16( @@ -2322,11 +2322,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i32_i32(<vscale x 4 x i1> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i32.i32( @@ -2357,11 +2357,11 @@ entry: define <vscale x 8 x i1> @intrinsic_vmsne_mask_vi_nxv8i32_i32(<vscale x 8 x i1> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv8i32_i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i1> @llvm.riscv.vmsne.mask.nxv8i32.i32( @@ -2427,11 +2427,11 @@ entry: define <vscale x 2 x i1> @intrinsic_vmsne_mask_vi_nxv2i64_i64(<vscale x 2 x i1> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv2i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu ; CHECK-NEXT: vmv1r.v v0, v10 -; CHECK-NEXT: vmsne.vi v11, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: vmsne.vi v2, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: ret entry: %a = call <vscale x 2 x i1> @llvm.riscv.vmsne.mask.nxv2i64.i64( @@ -2462,11 +2462,11 @@ entry: define <vscale x 4 x i1> @intrinsic_vmsne_mask_vi_nxv4i64_i64(<vscale x 4 x i1> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsne_mask_vi_nxv4i64_i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v4, v0 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu ; CHECK-NEXT: vmv1r.v v0, v12 -; CHECK-NEXT: vmsne.vi v13, v8, 9, v0.t -; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: vmsne.vi v4, v8, 9, v0.t +; CHECK-NEXT: vmv1r.v v0, v4 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i1> @llvm.riscv.vmsne.mask.nxv4i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll index c0bba31..b8d6fa4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-reverse-mask.ll @@ -162,8 +162,7 @@ define <vscale x 16 x i1> @test_vp_reverse_nxv16i1_masked(<vscale x 16 x i1> %sr ; CHECK-NEXT: vrsub.vx v12, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vrgatherei16.vv v10, v16, v12, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 16 x i1> @llvm.experimental.vp.reverse.nxv16i1(<vscale x 16 x i1> %src, <vscale x 16 x i1> %mask, i32 %evl) ret <vscale x 16 x i1> %dst @@ -200,8 +199,7 @@ define <vscale x 32 x i1> @test_vp_reverse_nxv32i1_masked(<vscale x 32 x i1> %sr ; CHECK-NEXT: vrsub.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v24, v16, v0.t -; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 32 x i1> @llvm.experimental.vp.reverse.nxv32i1(<vscale x 32 x i1> %src, <vscale x 32 x i1> %mask, i32 %evl) ret <vscale x 32 x i1> %dst @@ -246,8 +244,7 @@ define <vscale x 64 x i1> @test_vp_reverse_nxv64i1_masked(<vscale x 64 x i1> %sr ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vslidedown.vx v8, v16, a1, v0.t -; CHECK-NEXT: vmsne.vi v16, v8, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %dst = call <vscale x 64 x i1> @llvm.experimental.vp.reverse.nxv64i1(<vscale x 64 x i1> %src, <vscale x 64 x i1> %mask, i32 %evl) ret <vscale x 64 x i1> %dst diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index 0576255..52e4f11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -369,8 +369,7 @@ define <vscale x 16 x i1> @test_vp_splice_nxv16i1_masked(<vscale x 16 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vslideup.vx v10, v12, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v10, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 16 x i1> @llvm.experimental.vp.splice.nxv16i1(<vscale x 16 x i1> %va, <vscale x 16 x i1> %vb, i32 5, <vscale x 16 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 16 x i1> %v @@ -443,8 +442,7 @@ define <vscale x 32 x i1> @test_vp_splice_nxv32i1_masked(<vscale x 32 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vslideup.vx v12, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; CHECK-NEXT: vmsne.vi v8, v12, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v12, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 32 x i1> @llvm.experimental.vp.splice.nxv32i1(<vscale x 32 x i1> %va, <vscale x 32 x i1> %vb, i32 5, <vscale x 32 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 32 x i1> %v @@ -517,8 +515,7 @@ define <vscale x 64 x i1> @test_vp_splice_nxv64i1_masked(<vscale x 64 x i1> %va, ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vslideup.vx v16, v24, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v16, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmsne.vi v0, v16, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 64 x i1> @llvm.experimental.vp.splice.nxv64i1(<vscale x 64 x i1> %va, <vscale x 64 x i1> %vb, i32 5, <vscale x 64 x i1> %mask, i32 %evla, i32 %evlb) ret <vscale x 64 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index 5ca62be..afa7931 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -375,11 +375,11 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale ; CHECK-NEXT: vl8r.v v16, (a4) ; CHECK-NEXT: vl8r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: sltu a4, a3, a0 ; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a0, a4, a0 +; CHECK-NEXT: sub a0, a3, a1 +; CHECK-NEXT: sltu a2, a3, a0 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a0, a2, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index eb70f18..b663e3b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -451,21 +451,22 @@ define <vscale x 16 x double> @vselect_combine_regression(<vscale x 16 x i64> %v ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 -; CHECK-NEXT: vl8re64.v v24, (a1) +; CHECK-NEXT: vl8re64.v v8, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmseq.vi v7, v16, 0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmseq.vi v0, v16, 0 +; CHECK-NEXT: vmseq.vi v24, v16, 0 +; CHECK-NEXT: vmseq.vi v0, v0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll index ad80976..cbb7cdec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp-mask.ll @@ -56,9 +56,8 @@ define <vscale x 2 x i1> @vtrunc_nxv2i1_nxv2i64(<vscale x 2 x i64> %a, <vscale x ; CHECK-LABEL: vtrunc_nxv2i1_nxv2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; CHECK-NEXT: vand.vi v10, v8, 1, v0.t -; CHECK-NEXT: vmsne.vi v8, v10, 0, v0.t -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vand.vi v8, v8, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t ; CHECK-NEXT: ret %v = call <vscale x 2 x i1> @llvm.vp.trunc.nxv2i1.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %m, i32 %vl) ret <vscale x 2 x i1> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index 7007b40..f848d9b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -296,29 +296,29 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a5, a1, 3 -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v0, a5 -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; CHECK-NEXT: srli a3, a1, 3 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: vslidedown.vx v25, v0, a3 -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: vl8re64.v v16, (a3) -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v8, v0, a4 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vl8re64.v v16, (a4) +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv1r.v v25, v8 +; CHECK-NEXT: vslidedown.vx v0, v8, a3 ; CHECK-NEXT: slli a3, a1, 1 ; CHECK-NEXT: sub a4, a2, a3 -; CHECK-NEXT: sltu a6, a2, a4 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a4, a6, a4 -; CHECK-NEXT: sub a6, a4, a1 -; CHECK-NEXT: sltu a7, a4, a6 +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vmv1r.v v24, v25 -; CHECK-NEXT: vslidedown.vx v0, v25, a5 +; CHECK-NEXT: sltu a0, a4, a5 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a5 ; CHECK-NEXT: bltu a4, a1, .LBB17_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 @@ -326,7 +326,7 @@ define <vscale x 32 x i32> @vtrunc_nxv32i64_nxv32i32(<vscale x 32 x i64> %a, <vs ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v28, v16, 0, v0.t ; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB17_4 ; CHECK-NEXT: # %bb.3: diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir index 64e19188..a588677 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm.mir @@ -11,9 +11,9 @@ body: | ; MIR-LABEL: name: verify_vxrm ; MIR: liveins: $v8, $v9, $x10 ; MIR-NEXT: {{ $}} - ; MIR-NEXT: dead $x0 = PseudoVSETVLI renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; MIR-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype ; MIR-NEXT: WriteVXRMImm 0, implicit-def $vxrm - ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, renamable $v8, renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vxrm + ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vxrm ; MIR-NEXT: PseudoRET implicit $v8 ; ASM-LABEL: verify_vxrm: ; ASM: # %bb.0: @@ -23,8 +23,8 @@ body: | ; ASM-NEXT: ret %0:vr = COPY $v8 %1:vr = COPY $v9 - dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype + %2:gprnox0 = COPY $x10 %pt:vr = IMPLICIT_DEF - renamable $v8 = PseudoVAADD_VV_MF8 %pt, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 + renamable $v8 = PseudoVAADD_VV_MF8 %pt, %0, %1, 0, %2, 3 /* e8 */, 0 PseudoRET implicit $v8 ... diff --git a/llvm/test/CodeGen/RISCV/strip-w-suffix.ll b/llvm/test/CodeGen/RISCV/strip-w-suffix.ll deleted file mode 100644 index 4124b3d..0000000 --- a/llvm/test/CodeGen/RISCV/strip-w-suffix.ll +++ /dev/null @@ -1,74 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=STRIP %s -; RUN: llc -mtriple=riscv64 -mattr=+m,+no-strip-w-suffix -verify-machineinstrs < %s \ -; RUN: | FileCheck -check-prefixes=NO-STRIP %s - -define i32 @addiw(i32 %a) { -; STRIP-LABEL: addiw: -; STRIP: # %bb.0: -; STRIP-NEXT: lui a1, 1 -; STRIP-NEXT: addi a1, a1, -1 -; STRIP-NEXT: addw a0, a0, a1 -; STRIP-NEXT: ret -; -; NO-STRIP-LABEL: addiw: -; NO-STRIP: # %bb.0: -; NO-STRIP-NEXT: lui a1, 1 -; NO-STRIP-NEXT: addiw a1, a1, -1 -; NO-STRIP-NEXT: addw a0, a0, a1 -; NO-STRIP-NEXT: ret - %ret = add i32 %a, 4095 - ret i32 %ret -} - -define i32 @addw(i32 %a, i32 %b) { -; STRIP-LABEL: addw: -; STRIP: # %bb.0: -; STRIP-NEXT: add a0, a0, a1 -; STRIP-NEXT: addiw a0, a0, 1024 -; STRIP-NEXT: ret -; -; NO-STRIP-LABEL: addw: -; NO-STRIP: # %bb.0: -; NO-STRIP-NEXT: addw a0, a0, a1 -; NO-STRIP-NEXT: addiw a0, a0, 1024 -; NO-STRIP-NEXT: ret - %add = add i32 %a, %b - %ret = add i32 %add, 1024 - ret i32 %ret -} - -define i32 @mulw(i32 %a, i32 %b) { -; STRIP-LABEL: mulw: -; STRIP: # %bb.0: -; STRIP-NEXT: mul a0, a0, a1 -; STRIP-NEXT: addiw a0, a0, 1024 -; STRIP-NEXT: ret -; -; NO-STRIP-LABEL: mulw: -; NO-STRIP: # %bb.0: -; NO-STRIP-NEXT: mulw a0, a0, a1 -; NO-STRIP-NEXT: addiw a0, a0, 1024 -; NO-STRIP-NEXT: ret - %mul = mul i32 %a, %b - %ret = add i32 %mul, 1024 - ret i32 %ret -} - -define i32 @slliw(i32 %a) { -; STRIP-LABEL: slliw: -; STRIP: # %bb.0: -; STRIP-NEXT: slli a0, a0, 1 -; STRIP-NEXT: addiw a0, a0, 1024 -; STRIP-NEXT: ret -; -; NO-STRIP-LABEL: slliw: -; NO-STRIP: # %bb.0: -; NO-STRIP-NEXT: slliw a0, a0, 1 -; NO-STRIP-NEXT: addiw a0, a0, 1024 -; NO-STRIP-NEXT: ret - %shl = shl i32 %a, 1 - %ret = add i32 %shl, 1024 - ret i32 %ret -} diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 599b0d0..ce0d8fe 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -3,9 +3,9 @@ ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,SLOW,RV64I %s -; RUN: llc -mtriple=riscv32 -mattr=+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV32I-FAST %s -; RUN: llc -mtriple=riscv64 -mattr=+fast-unaligned-access -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=ALL,FAST,RV64I-FAST %s ; A collection of cases showing codegen for unaligned loads and stores diff --git a/llvm/test/CodeGen/SPIRV/const-composite.ll b/llvm/test/CodeGen/SPIRV/const-composite.ll new file mode 100644 index 0000000..4e304bb --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/const-composite.ll @@ -0,0 +1,26 @@ +; This test is to ensure that OpConstantComposite reuses a constant when it's +; already created and available in the same machine function. In this test case +; it's `1` that is passed implicitly as a part of the `foo` function argument +; and also takes part in a composite constant creation. + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV: %[[#type_int32:]] = OpTypeInt 32 0 +; CHECK-SPIRV: %[[#const1:]] = OpConstant %[[#type_int32]] 1 +; CHECK-SPIRV: OpTypeArray %[[#]] %[[#const1:]] +; CHECK-SPIRV: %[[#const0:]] = OpConstant %[[#type_int32]] 0 +; CHECK-SPIRV: OpConstantComposite %[[#]] %[[#const0]] %[[#const1]] + +%struct = type { [1 x i64] } + +define spir_kernel void @foo(ptr noundef byval(%struct) %arg) { +entry: + call spir_func void @bar(<2 x i32> noundef <i32 0, i32 1>) + ret void +} + +define spir_func void @bar(<2 x i32> noundef) { +entry: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll new file mode 100644 index 0000000..b1dd388 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/any.ll @@ -0,0 +1,187 @@ +; RUN: llc -O0 -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-HLSL +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OCL +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; Make sure spirv operation function calls for any are generated. + +; CHECK-HLSL-DAG: OpMemoryModel Logical GLSL450 +; CHECK-OCL-DAG: OpMemoryModel Physical32 OpenCL +; CHECK-DAG: OpName %[[#any_bool_arg:]] "a" +; CHECK-DAG: %[[#int_64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#bool:]] = OpTypeBool +; CHECK-DAG: %[[#int_32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#int_16:]] = OpTypeInt 16 0 +; CHECK-DAG: %[[#float_64:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#float_32:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#float_16:]] = OpTypeFloat 16 +; CHECK-DAG: %[[#vec4_bool:]] = OpTypeVector %[[#bool]] 4 +; CHECK-DAG: %[[#vec4_16:]] = OpTypeVector %[[#int_16]] 4 +; CHECK-DAG: %[[#vec4_32:]] = OpTypeVector %[[#int_32]] 4 +; CHECK-DAG: %[[#vec4_64:]] = OpTypeVector %[[#int_64]] 4 +; CHECK-DAG: %[[#vec4_float_16:]] = OpTypeVector %[[#float_16]] 4 +; CHECK-DAG: %[[#vec4_float_32:]] = OpTypeVector %[[#float_32]] 4 +; CHECK-DAG: %[[#vec4_float_64:]] = OpTypeVector %[[#float_64]] 4 + +; CHECK-HLSL-DAG: %[[#const_i64_0:]] = OpConstant %[[#int_64]] 0 +; CHECK-HLSL-DAG: %[[#const_i32_0:]] = OpConstant %[[#int_32]] 0 +; CHECK-HLSL-DAG: %[[#const_i16_0:]] = OpConstant %[[#int_16]] 0 +; CHECK-HLSL-DAG: %[[#const_f64_0:]] = OpConstant %[[#float_64]] 0 +; CHECK-HLSL-DAG: %[[#const_f32_0:]] = OpConstant %[[#float_32:]] 0 +; CHECK-HLSL-DAG: %[[#const_f16_0:]] = OpConstant %[[#float_16:]] 0 +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantComposite %[[#vec4_16:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]] %[[#const_i16_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantComposite %[[#vec4_32:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]] %[[#const_i32_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantComposite %[[#vec4_64:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]] %[[#const_i64_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantComposite %[[#vec4_float_16:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]] %[[#const_f16_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantComposite %[[#vec4_float_32:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]] %[[#const_f32_0:]] +; CHECK-HLSL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantComposite %[[#vec4_float_64:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]] %[[#const_f64_0:]] + +; CHECK-OCL-DAG: %[[#const_i64_0:]] = OpConstantNull %[[#int_64]] +; CHECK-OCL-DAG: %[[#const_i32_0:]] = OpConstantNull %[[#int_32]] +; CHECK-OCL-DAG: %[[#const_i16_0:]] = OpConstantNull %[[#int_16]] +; CHECK-OCL-DAG: %[[#const_f64_0:]] = OpConstantNull %[[#float_64]] +; CHECK-OCL-DAG: %[[#const_f32_0:]] = OpConstantNull %[[#float_32:]] +; CHECK-OCL-DAG: %[[#const_f16_0:]] = OpConstantNull %[[#float_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i16:]] = OpConstantNull %[[#vec4_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i32:]] = OpConstantNull %[[#vec4_32:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_i64:]] = OpConstantNull %[[#vec4_64:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f16:]] = OpConstantNull %[[#vec4_float_16:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f32:]] = OpConstantNull %[[#vec4_float_32:]] +; CHECK-OCL-DAG: %[[#vec4_const_zeros_f64:]] = OpConstantNull %[[#vec4_float_64:]] + +define noundef i1 @any_int64_t(i64 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i64_0:]] + %hlsl.any = call i1 @llvm.spv.any.i64(i64 %p0) + ret i1 %hlsl.any +} + + +define noundef i1 @any_int(i32 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i32_0:]] + %hlsl.any = call i1 @llvm.spv.any.i32(i32 %p0) + ret i1 %hlsl.any +} + + +define noundef i1 @any_int16_t(i16 noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpINotEqual %[[#bool:]] %[[#arg0:]] %[[#const_i16_0:]] + %hlsl.any = call i1 @llvm.spv.any.i16(i16 %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_double(double noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f64_0:]] + %hlsl.any = call i1 @llvm.spv.any.f64(double %p0) + ret i1 %hlsl.any +} + + +define noundef i1 @any_float(float noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f32_0:]] + %hlsl.any = call i1 @llvm.spv.any.f32(float %p0) + ret i1 %hlsl.any +} + + +define noundef i1 @any_half(half noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpFOrdNotEqual %[[#bool:]] %[[#arg0:]] %[[#const_f16_0:]] + %hlsl.any = call i1 @llvm.spv.any.f16(half %p0) + ret i1 %hlsl.any +} + + +define noundef i1 @any_bool4(<4 x i1> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#]] = OpAny %[[#vec4_bool:]] %[[#arg0:]] + %hlsl.any = call i1 @llvm.spv.any.v4i1(<4 x i1> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_short4(<4 x i16> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#shortVecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i16:]] + ; CHECK: %[[#]] = OpAny %[[#bool:]] %[[#shortVecNotEq:]] + %hlsl.any = call i1 @llvm.spv.any.v4i16(<4 x i16> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_int4(<4 x i32> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#i32VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i32:]] + ; CHECK: %[[#]] = OpAny %[[#bool:]] %[[#i32VecNotEq:]] + %hlsl.any = call i1 @llvm.spv.any.v4i32(<4 x i32> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_int64_t4(<4 x i64> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#i64VecNotEq:]] = OpINotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_i64:]] + ; CHECK: %[[#]] = OpAny %[[#bool:]] %[[#i64VecNotEq]] + %hlsl.any = call i1 @llvm.spv.any.v4i64(<4 x i64> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_half4(<4 x half> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f16VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f16:]] + ; CHECK: %[[#]] = OpAny %[[#bool]] %[[#f16VecNotEq:]] + %hlsl.any = call i1 @llvm.spv.any.v4f16(<4 x half> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_float4(<4 x float> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f32VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f32:]] + ; CHECK: %[[#]] = OpAny %[[#bool:]] %[[#f32VecNotEq:]] + %hlsl.any = call i1 @llvm.spv.any.v4f32(<4 x float> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_double4(<4 x double> noundef %p0) { +entry: + ; CHECK: %[[#arg0:]] = OpFunctionParameter %[[#]] + ; CHECK: %[[#f64VecNotEq:]] = OpFOrdNotEqual %[[#vec4_bool:]] %[[#arg0:]] %[[#vec4_const_zeros_f64:]] + ; CHECK: %[[#]] = OpAny %[[#bool:]] %[[#f64VecNotEq:]] + %hlsl.any = call i1 @llvm.spv.any.v4f64(<4 x double> %p0) + ret i1 %hlsl.any +} + +define noundef i1 @any_bool(i1 noundef %a) { +entry: + ; CHECK: %[[#any_bool_arg:]] = OpFunctionParameter %[[#bool:]] + ; CHECK: OpReturnValue %[[#any_bool_arg:]] + %hlsl.any = call i1 @llvm.spv.any.i1(i1 %a) + ret i1 %hlsl.any +} + +declare i1 @llvm.spv.any.v4f16(<4 x half>) +declare i1 @llvm.spv.any.v4f32(<4 x float>) +declare i1 @llvm.spv.any.v4f64(<4 x double>) +declare i1 @llvm.spv.any.v4i1(<4 x i1>) +declare i1 @llvm.spv.any.v4i16(<4 x i16>) +declare i1 @llvm.spv.any.v4i32(<4 x i32>) +declare i1 @llvm.spv.any.v4i64(<4 x i64>) +declare i1 @llvm.spv.any.i1(i1) +declare i1 @llvm.spv.any.i16(i16) +declare i1 @llvm.spv.any.i32(i32) +declare i1 @llvm.spv.any.i64(i64) +declare i1 @llvm.spv.any.f16(half) +declare i1 @llvm.spv.any.f32(float) +declare i1 @llvm.spv.any.f64(double) diff --git a/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll new file mode 100644 index 0000000..bf71eb5 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/instructions/ret-type.ll @@ -0,0 +1,82 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --translator-compatibility-mode %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[Test1:.*]] "test1" +; CHECK-DAG: OpName %[[Foo:.*]] "foo" +; CHECK-DAG: OpName %[[Bar:.*]] "bar" +; CHECK-DAG: OpName %[[Test2:.*]] "test2" + +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 64 0 +; CHECK-DAG: %[[Array:.*]] = OpTypeArray %[[Long]] %[[#]] +; CHECK-DAG: %[[Struct1:.*]] = OpTypeStruct %[[Array]] +; CHECK-DAG: %[[Struct2:.*]] = OpTypeStruct %[[Struct1]] +; CHECK-DAG: %[[StructPtr:.*]] = OpTypePointer Function %[[Struct2]] +; CHECK-DAG: %[[Bool:.*]] = OpTypeBool +; CHECK-DAG: %[[FooType:.*]] = OpTypeFunction %[[StructPtr:.*]] %[[StructPtr]] %[[StructPtr]] %[[Bool]] +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[CharPtr:.*]] = OpTypePointer Function %[[Char]] + +; CHECK: %[[Test1]] = OpFunction +; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]] +; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]] +; CHECK: OpFunctionEnd + +; CHECK: %[[Foo]] = OpFunction %[[StructPtr:.*]] None %[[FooType]] +; CHECK: %[[Arg1:.*]] = OpFunctionParameter %[[StructPtr]] +; CHECK: %[[Arg2:.*]] = OpFunctionParameter +; CHECK: %[[Sw:.*]] = OpFunctionParameter +; CHECK: %[[Res:.*]] = OpInBoundsPtrAccessChain %[[StructPtr]] %[[Arg1]] %[[#]] +; CHECK: OpReturnValue %[[Res]] +; CHECK: OpReturnValue %[[Arg2]] + +; CHECK: %[[Bar]] = OpFunction %[[StructPtr:.*]] None %[[#]] +; CHECK: %[[BarArg:.*]] = OpFunctionParameter +; CHECK: %[[BarRes:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[BarArg]] %[[#]] +; CHECK: %[[BarResCasted:.*]] = OpBitcast %[[StructPtr]] %[[BarRes]] +; CHECK: %[[BarResStruct:.*]] = OpInBoundsPtrAccessChain %[[StructPtr]] %[[#]] %[[#]] +; CHECK: OpReturnValue %[[BarResStruct]] +; CHECK: OpReturnValue %[[BarResCasted]] + +; CHECK: %[[Test2]] = OpFunction +; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Foo]] +; CHECK: OpFunctionCall %[[StructPtr:.*]] %[[Bar]] +; CHECK: OpFunctionEnd + +%struct = type { %array } +%array = type { [1 x i64] } + +define spir_func void @test1(ptr %arg1, ptr %arg2, i1 %sw) { +entry: + %r1 = call ptr @foo(ptr %arg1, ptr %arg2, i1 %sw) + %r2 = call ptr @bar(ptr %arg1, i1 %sw) + ret void +} + +define spir_func ptr @foo(ptr %arg1, ptr %arg2, i1 %sw) { +entry: + br i1 %sw, label %exit, label %sw1 +sw1: + %result = getelementptr inbounds %struct, ptr %arg1, i64 100 + ret ptr %result +exit: + ret ptr %arg2 +} + +define spir_func ptr @bar(ptr %arg1, i1 %sw) { +entry: + %charptr = getelementptr inbounds i8, ptr %arg1, i64 0 + br i1 %sw, label %exit, label %sw1 +sw1: + %result = getelementptr inbounds %struct, ptr %arg1, i64 100 + ret ptr %result +exit: + ret ptr %charptr +} + +define spir_func void @test2(ptr %arg1, ptr %arg2, i1 %sw) { +entry: + %r1 = call ptr @foo(ptr %arg1, ptr %arg2, i1 %sw) + %r2 = call ptr @bar(ptr %arg1, i1 %sw) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/instructions/select-phi.ll b/llvm/test/CodeGen/SPIRV/instructions/select-phi.ll new file mode 100644 index 0000000..3828fe8 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/instructions/select-phi.ll @@ -0,0 +1,62 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s + +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --translator-compatibility-mode %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --translator-compatibility-mode %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[Char:.*]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Long:.*]] = OpTypeInt 32 0 +; CHECK-DAG: %[[Array:.*]] = OpTypeArray %[[Long]] %[[#]] +; CHECK-DAG: %[[Struct:.*]] = OpTypeStruct %[[Array]] +; CHECK-DAG: %[[StructPtr:.*]] = OpTypePointer Function %[[Struct]] +; CHECK-DAG: %[[CharPtr:.*]] = OpTypePointer Function %[[Char]] + +; CHECK: %[[Branch1:.*]] = OpLabel +; CHECK: %[[Res1:.*]] = OpVariable %[[StructPtr]] Function +; CHECK: OpBranchConditional %[[#]] %[[#]] %[[Branch2:.*]] +; CHECK: %[[Res2:.*]] = OpInBoundsPtrAccessChain %[[CharPtr]] %[[#]] %[[#]] +; CHECK: %[[Res2Casted:.*]] = OpBitcast %[[StructPtr]] %[[Res2]] +; CHECK: OpBranchConditional %[[#]] %[[#]] %[[BranchSelect:.*]] +; CHECK: %[[SelectRes:.*]] = OpSelect %[[CharPtr]] %[[#]] %[[#]] %[[#]] +; CHECK: %[[SelectResCasted:.*]] = OpBitcast %[[StructPtr]] %[[SelectRes]] +; CHECK: OpLabel +; CHECK: OpPhi %[[StructPtr]] %[[Res1]] %[[Branch1]] %[[Res2Casted]] %[[Branch2]] %[[SelectResCasted]] %[[BranchSelect]] + +%struct = type { %array } +%array = type { [1 x i64] } +%array3 = type { [3 x i32] } + +define spir_kernel void @foo(ptr addrspace(1) noundef align 1 %arg1, ptr noundef byval(%struct) align 8 %arg2, i1 noundef zeroext %expected) { +entry: + %agg = alloca %array3, align 8 + %r0 = load i64, ptr %arg2, align 8 + %add.ptr = getelementptr inbounds i8, ptr %agg, i64 12 + %r1 = load i32, ptr %agg, align 4 + %tobool0 = icmp slt i32 %r1, 0 + br i1 %tobool0, label %exit, label %sw1 + +sw1: ; preds = %entry + %incdec1 = getelementptr inbounds i8, ptr %agg, i64 4 + %r2 = load i32, ptr %incdec1, align 4 + %tobool1 = icmp slt i32 %r2, 0 + br i1 %tobool1, label %exit, label %sw2 + +sw2: ; preds = %sw1 + %incdec2 = getelementptr inbounds i8, ptr %agg, i64 8 + %r3 = load i32, ptr %incdec2, align 4 + %tobool2 = icmp slt i32 %r3, 0 + %spec.select = select i1 %tobool2, ptr %incdec2, ptr %add.ptr + br label %exit + +exit: ; preds = %sw2, %sw1, %entry + %retval.0 = phi ptr [ %agg, %entry ], [ %incdec1, %sw1 ], [ %spec.select, %sw2 ] + %add.ptr.i = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 %r0 + %r4 = icmp eq ptr %retval.0, %add.ptr + %cmp = xor i1 %r4, %expected + %frombool6.i = zext i1 %cmp to i8 + store i8 %frombool6.i, ptr addrspace(1) %add.ptr.i, align 1 + %r5 = icmp eq ptr %add.ptr, %retval.0 + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll b/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll new file mode 100644 index 0000000..0ff2895 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/instructions/select-ptr-load.ll @@ -0,0 +1,25 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: %[[Float:.*]] = OpTypeFloat 32 +; CHECK-SPIRV-DAG: %[[FloatPtr:.*]] = OpTypePointer Function %[[Float]] +; CHECK-SPIRV: OpInBoundsPtrAccessChain %[[FloatPtr]] +; CHECK-SPIRV: OpInBoundsPtrAccessChain %[[FloatPtr]] +; CHECK-SPIRV: OpSelect %[[FloatPtr]] +; CHECK-SPIRV: OpLoad %[[Float]] + +%struct = type { [3 x float] } + +define spir_kernel void @bar(i1 %sw) { +entry: + %var1 = alloca %struct + %var2 = alloca %struct + %elem1 = getelementptr inbounds [3 x float], ptr %var1, i64 0, i64 0 + %elem2 = getelementptr inbounds [3 x float], ptr %var2, i64 0, i64 1 + %elem = select i1 %sw, ptr %elem1, ptr %elem2 + %res = load float, ptr %elem + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/instructions/select.ll b/llvm/test/CodeGen/SPIRV/instructions/select.ll index f54ef21..9234b97 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/select.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/select.ll @@ -1,6 +1,11 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpName [[SCALARi32:%.+]] "select_i32" +; CHECK-DAG: OpName [[SCALARPTR:%.+]] "select_ptr" ; CHECK-DAG: OpName [[VEC2i32:%.+]] "select_i32v2" ; CHECK-DAG: OpName [[VEC2i32v2:%.+]] "select_v2i32v2" @@ -17,6 +22,19 @@ define i32 @select_i32(i1 %c, i32 %t, i32 %f) { ret i32 %r } +; CHECK: [[SCALARPTR]] = OpFunction +; CHECK-NEXT: [[C:%.+]] = OpFunctionParameter +; CHECK-NEXT: [[T:%.+]] = OpFunctionParameter +; CHECK-NEXT: [[F:%.+]] = OpFunctionParameter +; CHECK: OpLabel +; CHECK: [[R:%.+]] = OpSelect {{%.+}} [[C]] [[T]] [[F]] +; CHECK: OpReturnValue [[R]] +; CHECK-NEXT: OpFunctionEnd +define ptr @select_ptr(i1 %c, ptr %t, ptr %f) { + %r = select i1 %c, ptr %t, ptr %f + ret ptr %r +} + ; CHECK: [[VEC2i32]] = OpFunction ; CHECK-NEXT: [[C:%.+]] = OpFunctionParameter ; CHECK-NEXT: [[T:%.+]] = OpFunctionParameter diff --git a/llvm/test/CodeGen/SPIRV/select.ll b/llvm/test/CodeGen/SPIRV/select-builtin.ll index b34e91b..6717970 100644 --- a/llvm/test/CodeGen/SPIRV/select.ll +++ b/llvm/test/CodeGen/SPIRV/select-builtin.ll @@ -1,4 +1,6 @@ ; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-SPIRV: OpSelect diff --git a/llvm/test/CodeGen/SPIRV/transcoding/memcpy-zext.ll b/llvm/test/CodeGen/SPIRV/transcoding/memcpy-zext.ll new file mode 100644 index 0000000..ea01975 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/memcpy-zext.ll @@ -0,0 +1,43 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-32 +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-64 +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-64-DAG: %[[#i64:]] = OpTypeInt 64 0 + +; CHECK-DAG: %[[#i8:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#i32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#one:]] = OpConstant %[[#i32]] 1 +; CHECK-DAG: %[[#two:]] = OpConstant %[[#i32]] 2 +; CHECK-DAG: %[[#three:]] = OpConstant %[[#i32]] 3 +; CHECK-DAG: %[[#i32x3:]] = OpTypeArray %[[#i32]] %[[#three]] +; CHECK-DAG: %[[#test_arr_init:]] = OpConstantComposite %[[#i32x3]] %[[#one]] %[[#two]] %[[#three]] +; CHECK-DAG: %[[#szconst1024:]] = OpConstant %[[#i32]] 1024 +; CHECK-DAG: %[[#szconst42:]] = OpConstant %[[#i8]] 42 +; CHECK-DAG: %[[#const_i32x3_ptr:]] = OpTypePointer UniformConstant %[[#i32x3]] +; CHECK-DAG: %[[#test_arr:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] +; CHECK-DAG: %[[#i32x3_ptr:]] = OpTypePointer Function %[[#i32x3]] +; CHECK: %[[#arr:]] = OpVariable %[[#i32x3_ptr]] Function + +; CHECK-32: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#szconst1024]] +; CHECK-64: %[[#szconstext1024:]] = OpUConvert %[[#i64:]] %[[#szconst1024:]] +; CHECK-64: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#szconstext1024]] + +; CHECK-32: %[[#szconstext42:]] = OpUConvert %[[#i32:]] %[[#szconst42:]] +; CHECK-32: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#szconstext42]] +; CHECK-64: %[[#szconstext42:]] = OpUConvert %[[#i64:]] %[[#szconst42:]] +; CHECK-64: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#szconstext42]] + +@__const.test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3] + +define spir_func void @test() { +entry: + %arr = alloca [3 x i32], align 4 + %dest = bitcast ptr %arr to ptr + call void @llvm.memcpy.p0.p2.i32(ptr align 4 %dest, ptr addrspace(2) align 4 @__const.test.arr, i32 1024, i1 false) + call void @llvm.memcpy.p0.p2.i8(ptr align 4 %dest, ptr addrspace(2) align 4 @__const.test.arr, i8 42, i1 false) + ret void +} + +declare void @llvm.memcpy.p0.p2.i32(ptr nocapture writeonly, ptr addrspace(2) nocapture readonly, i32, i1) +declare void @llvm.memcpy.p0.p2.i8(ptr nocapture writeonly, ptr addrspace(2) nocapture readonly, i8, i1) diff --git a/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll b/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll index e0172ec..04fb391 100644 --- a/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll +++ b/llvm/test/CodeGen/SPIRV/transcoding/spirv-private-array-initialization.ll @@ -1,23 +1,34 @@ -; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV -; -; CHECK-SPIRV-DAG: %[[#i32:]] = OpTypeInt 32 0 -; CHECK-SPIRV-DAG: %[[#one:]] = OpConstant %[[#i32]] 1 -; CHECK-SPIRV-DAG: %[[#two:]] = OpConstant %[[#i32]] 2 -; CHECK-SPIRV-DAG: %[[#three:]] = OpConstant %[[#i32]] 3 -; CHECK-SPIRV-DAG: %[[#i32x3:]] = OpTypeArray %[[#i32]] %[[#three]] -; CHECK-SPIRV-DAG: %[[#test_arr_init:]] = OpConstantComposite %[[#i32x3]] %[[#one]] %[[#two]] %[[#three]] -; CHECK-SPIRV-DAG: %[[#twelve:]] = OpConstant %[[#i32]] 12 -; CHECK-SPIRV-DAG: %[[#const_i32x3_ptr:]] = OpTypePointer UniformConstant %[[#i32x3]] - -; CHECK-SPIRV: %[[#test_arr2:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] -; CHECK-SPIRV: %[[#test_arr:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] - -; CHECK-SPIRV-DAG: %[[#i32x3_ptr:]] = OpTypePointer Function %[[#i32x3]] - -; CHECK-SPIRV: %[[#arr:]] = OpVariable %[[#i32x3_ptr]] Function -; CHECK-SPIRV: %[[#arr2:]] = OpVariable %[[#i32x3_ptr]] Function -; CHECK-SPIRV: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#twelve]] Aligned 4 -; CHECK-SPIRV: OpCopyMemorySized %[[#arr2]] %[[#test_arr2]] %[[#twelve]] Aligned 4 +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV,CHECK-SPIRV-32 +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefixes=CHECK-SPIRV,CHECK-SPIRV-64 +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-64-DAG: %[[#i64:]] = OpTypeInt 64 0 + +; CHECK-SPIRV-DAG: %[[#i32:]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[#one:]] = OpConstant %[[#i32]] 1 +; CHECK-SPIRV-DAG: %[[#two:]] = OpConstant %[[#i32]] 2 +; CHECK-SPIRV-DAG: %[[#three:]] = OpConstant %[[#i32]] 3 +; CHECK-SPIRV-DAG: %[[#i32x3:]] = OpTypeArray %[[#i32]] %[[#three]] +; CHECK-SPIRV-DAG: %[[#test_arr_init:]] = OpConstantComposite %[[#i32x3]] %[[#one]] %[[#two]] %[[#three]] +; CHECK-SPIRV-DAG: %[[#twelve:]] = OpConstant %[[#i32]] 12 +; CHECK-SPIRV-DAG: %[[#const_i32x3_ptr:]] = OpTypePointer UniformConstant %[[#i32x3]] + +; CHECK-SPIRV: %[[#test_arr2:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] +; CHECK-SPIRV: %[[#test_arr:]] = OpVariable %[[#const_i32x3_ptr]] UniformConstant %[[#test_arr_init]] + +; CHECK-SPIRV-DAG: %[[#i32x3_ptr:]] = OpTypePointer Function %[[#i32x3]] + +; CHECK-SPIRV: %[[#arr:]] = OpVariable %[[#i32x3_ptr]] Function +; CHECK-SPIRV: %[[#arr2:]] = OpVariable %[[#i32x3_ptr]] Function + +; CHECK-SPIRV-32: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#twelve]] Aligned 4 +; CHECK-SPIRV-32: OpCopyMemorySized %[[#arr2]] %[[#test_arr2]] %[[#twelve]] Aligned 4 + +; CHECK-SPIRV-64: %[[#twelvezext1:]] = OpUConvert %[[#i64:]] %[[#twelve:]] +; CHECK-SPIRV-64: OpCopyMemorySized %[[#arr]] %[[#test_arr]] %[[#twelvezext1]] Aligned 4 +; CHECK-SPIRV-64: %[[#twelvezext2:]] = OpUConvert %[[#i64:]] %[[#twelve:]] +; CHECK-SPIRV-64: OpCopyMemorySized %[[#arr2]] %[[#test_arr2]] %[[#twelvezext2]] Aligned 4 @__const.test.arr = private unnamed_addr addrspace(2) constant [3 x i32] [i32 1, i32 2, i32 3], align 4 diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll index 3bcbbb4..b855d01 100644 --- a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll +++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll @@ -458,3 +458,22 @@ define <16 x i8> @f30() { i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> ret <16 x i8> %res } + +; Test VPERM with various constant operands. +define i32 @f31() { +; CHECK-LABEL: f31: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI31_0 +; CHECK-NEXT: vl %v0, 0(%r1), 3 +; CHECK-NEXT: larl %r1, .LCPI31_1 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vperm %v0, %v1, %v1, %v0 +; CHECK-NEXT: vlgvb %r2, %v0, 0 +; CHECK-NEXT: nilf %r2, 7 +; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d +; CHECK-NEXT: br %r14 + %P = tail call <16 x i8> @llvm.s390.vperm(<16 x i8> <i8 0, i8 1, i8 1, i8 2, i8 1, i8 2, i8 2, i8 3, i8 1, i8 2, i8 2, i8 3, i8 2, i8 3, i8 3, i8 4>, <16 x i8> <i8 0, i8 1, i8 1, i8 2, i8 1, i8 2, i8 2, i8 3, i8 1, i8 2, i8 2, i8 3, i8 2, i8 3, i8 3, i8 4>, <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0>) + %E = extractelement <16 x i8> %P, i64 0 + %res = zext i8 %E to i32 + ret i32 %res +} diff --git a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll index a9b2037..8d2ee3c 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/fconstant.ll @@ -10,27 +10,22 @@ define void @test_float(ptr %a , float %b) { ; CHECK64_SMALL: # %bb.0: # %entry ; CHECK64_SMALL-NEXT: movss {{.*#+}} xmm1 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0] ; CHECK64_SMALL-NEXT: addss %xmm0, %xmm1 -; CHECK64_SMALL-NEXT: movd %xmm1, %eax -; CHECK64_SMALL-NEXT: movl %eax, (%rdi) +; CHECK64_SMALL-NEXT: movss %xmm1, (%rdi) ; CHECK64_SMALL-NEXT: retq ; ; CHECK64_LARGE-LABEL: test_float: ; CHECK64_LARGE: # %bb.0: # %entry ; CHECK64_LARGE-NEXT: movabsq ${{\.?LCPI[0-9]+_[0-9]+}}, %rax ; CHECK64_LARGE-NEXT: addss (%rax), %xmm0 -; CHECK64_LARGE-NEXT: movd %xmm0, %eax -; CHECK64_LARGE-NEXT: movl %eax, (%rdi) +; CHECK64_LARGE-NEXT: movss %xmm0, (%rdi) ; CHECK64_LARGE-NEXT: retq ; ; CHECK32-LABEL: test_float: ; CHECK32: # %bb.0: # %entry ; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK32-NEXT: movss {{.*#+}} xmm0 = [5.5E+0,0.0E+0,0.0E+0,0.0E+0] -; CHECK32-NEXT: movd %ecx, %xmm1 -; CHECK32-NEXT: addss %xmm0, %xmm1 -; CHECK32-NEXT: movd %xmm1, %ecx -; CHECK32-NEXT: movl %ecx, (%eax) +; CHECK32-NEXT: addss {{[0-9]+}}(%esp), %xmm0 +; CHECK32-NEXT: movss %xmm0, (%eax) ; CHECK32-NEXT: retl entry: %aa = fadd float 5.500000e+00, %b diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll new file mode 100644 index 0000000..3388af6 --- /dev/null +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-sse-intrinsics.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +; RUN: llc < %s -mtriple=i686-- -mattr=+sse -global-isel -stop-after=regbankselect | FileCheck %s + +define void @test_x86_sse_max_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_max_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_max_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_max_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.max.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_min_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_min_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ps), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_min_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_min_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) + ; CHECK-NEXT: [[LOAD2:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD1]](p0) :: (load (<4 x s32>) from %ir.p2) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.min.ss), [[LOAD2]](<4 x s32>), [[LOAD3]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %a1 = load <4 x float>, ptr %p2, align 16 + %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone + + +define void @test_x86_sse_rcp_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rcp_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ps), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rcp_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rcp_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rcp.ss), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rsqrt_ps(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rsqrt_ps + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ps), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone + + +define void @test_x86_sse_rsqrt_ss(ptr %p1, ptr %p2) { + ; CHECK-LABEL: name: test_x86_sse_rsqrt_ss + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.1 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (p0) from %fixed-stack.1) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:vecr(<4 x s32>) = G_LOAD [[LOAD]](p0) :: (load (<4 x s32>) from %ir.p1) + ; CHECK-NEXT: [[INT:%[0-9]+]]:vecr(<4 x s32>) = G_INTRINSIC intrinsic(@llvm.x86.sse.rsqrt.ss), [[LOAD1]](<4 x s32>) + ; CHECK-NEXT: G_STORE [[INT]](<4 x s32>), [[LOAD]](p0) :: (store (<4 x s32>) into %ir.p1) + ; CHECK-NEXT: RET 0 + %a0 = load <4 x float>, ptr %p1, align 16 + %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] + store <4 x float> %res, ptr %p1 + ret void +} +declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll index d09db0f..99d458a 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/regbankselect-x87.ll @@ -142,7 +142,7 @@ define float @f4(float %val) { ; X86-LABEL: name: f4 ; X86: bb.1 (%ir-block.0): ; X86-NEXT: [[FRAME_INDEX:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 - ; X86-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0) + ; X86-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load (s32) from %fixed-stack.0) ; X86-NEXT: $fp0 = COPY [[LOAD]](s32) ; X86-NEXT: RET 0, implicit $fp0 ; @@ -187,13 +187,10 @@ define void @f5(ptr %a, ptr %b) { ; X64-NEXT: {{ $}} ; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi ; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi - ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a) - ; X64-NEXT: [[LOAD1:%[0-9]+]]:gpr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b) - ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s64) = COPY [[LOAD]](s64) - ; X64-NEXT: [[COPY3:%[0-9]+]]:psr(s64) = COPY [[LOAD1]](s64) - ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[COPY2]], [[COPY3]] - ; X64-NEXT: [[COPY4:%[0-9]+]]:gpr(s64) = COPY [[FADD]](s64) - ; X64-NEXT: G_STORE [[COPY4]](s64), [[COPY]](p0) :: (store (s64) into %ir.a) + ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s64) = G_LOAD [[COPY]](p0) :: (load (s64) from %ir.a) + ; X64-NEXT: [[LOAD1:%[0-9]+]]:psr(s64) = G_LOAD [[COPY1]](p0) :: (load (s64) from %ir.b) + ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s64) = G_FADD [[LOAD]], [[LOAD1]] + ; X64-NEXT: G_STORE [[FADD]](s64), [[COPY]](p0) :: (store (s64) into %ir.a) ; X64-NEXT: RET 0 %load1 = load double, ptr %a, align 8 %load2 = load double, ptr %b, align 8 @@ -210,11 +207,9 @@ define void @f6(ptr %0, ptr %1) { ; X86-NEXT: [[FRAME_INDEX1:%[0-9]+]]:gpr(p0) = G_FRAME_INDEX %fixed-stack.0 ; X86-NEXT: [[LOAD1:%[0-9]+]]:gpr(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load (p0) from %fixed-stack.0) ; X86-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01 - ; X86-NEXT: [[LOAD2:%[0-9]+]]:gpr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0) - ; X86-NEXT: [[COPY:%[0-9]+]]:psr(s32) = COPY [[LOAD2]](s32) - ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY]], [[C]] - ; X86-NEXT: [[COPY1:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32) - ; X86-NEXT: G_STORE [[COPY1]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1) + ; X86-NEXT: [[LOAD2:%[0-9]+]]:psr(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.0) + ; X86-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD2]], [[C]] + ; X86-NEXT: G_STORE [[FADD]](s32), [[LOAD1]](p0) :: (store (s32) into %ir.1) ; X86-NEXT: RET 0 ; ; X64-LABEL: name: f6 @@ -224,11 +219,9 @@ define void @f6(ptr %0, ptr %1) { ; X64-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $rdi ; X64-NEXT: [[COPY1:%[0-9]+]]:gpr(p0) = COPY $rsi ; X64-NEXT: [[C:%[0-9]+]]:psr(s32) = G_FCONSTANT float 2.000000e+01 - ; X64-NEXT: [[LOAD:%[0-9]+]]:gpr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0) - ; X64-NEXT: [[COPY2:%[0-9]+]]:psr(s32) = COPY [[LOAD]](s32) - ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[COPY2]], [[C]] - ; X64-NEXT: [[COPY3:%[0-9]+]]:gpr(s32) = COPY [[FADD]](s32) - ; X64-NEXT: G_STORE [[COPY3]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1) + ; X64-NEXT: [[LOAD:%[0-9]+]]:psr(s32) = G_LOAD [[COPY]](p0) :: (load (s32) from %ir.0) + ; X64-NEXT: [[FADD:%[0-9]+]]:psr(s32) = G_FADD [[LOAD]], [[C]] + ; X64-NEXT: G_STORE [[FADD]](s32), [[COPY1]](p0) :: (store (s32) into %ir.1) ; X64-NEXT: RET 0 %load1 = load float, ptr %0 %add = fadd float %load1, 20.0 diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll index 704563a..4f26548 100644 --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -374,24 +374,10 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; ; GFNI-LABEL: test_bitreverse_i16: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: rolw $8, %di -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNI-NEXT: shll $4, %eax -; GFNI-NEXT: shrl $4, %edi -; GFNI-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNI-NEXT: orl %eax, %edi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNI-NEXT: shrl $2, %edi -; GFNI-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNI-NEXT: leal (%rdi,%rax,4), %eax -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNI-NEXT: shrl %eax -; GFNI-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNI-NEXT: leal (%rax,%rcx,2), %eax +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: rolw $8, %ax ; GFNI-NEXT: # kill: def $ax killed $ax killed $eax ; GFNI-NEXT: retq %b = call i16 @llvm.bitreverse.i16(i16 %a) @@ -446,19 +432,10 @@ define i8 @test_bitreverse_i8(i8 %a) { ; ; GFNI-LABEL: test_bitreverse_i8: ; GFNI: # %bb.0: -; GFNI-NEXT: rolb $4, %dil -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $51, %al -; GFNI-NEXT: shlb $2, %al -; GFNI-NEXT: shrb $2, %dil -; GFNI-NEXT: andb $51, %dil -; GFNI-NEXT: orb %dil, %al -; GFNI-NEXT: movl %eax, %ecx -; GFNI-NEXT: andb $85, %cl -; GFNI-NEXT: addb %cl, %cl -; GFNI-NEXT: shrb %al -; GFNI-NEXT: andb $85, %al -; GFNI-NEXT: orb %cl, %al +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: # kill: def $al killed $al killed $eax ; GFNI-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -514,19 +491,11 @@ define i4 @test_bitreverse_i4(i4 %a) { ; ; GFNI-LABEL: test_bitreverse_i4: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi -; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $8, %al -; GFNI-NEXT: leal (%rdi,%rdi), %ecx -; GFNI-NEXT: andb $4, %cl -; GFNI-NEXT: leal (,%rdi,8), %edx -; GFNI-NEXT: andb $8, %dl -; GFNI-NEXT: orb %cl, %dl -; GFNI-NEXT: shrb %dil -; GFNI-NEXT: andb $2, %dil -; GFNI-NEXT: orb %dil, %dl -; GFNI-NEXT: shrb $3, %al -; GFNI-NEXT: orb %dl, %al +; GFNI-NEXT: vmovd %edi, %xmm0 +; GFNI-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 +; GFNI-NEXT: vmovd %xmm0, %eax +; GFNI-NEXT: shrb $4, %al +; GFNI-NEXT: # kill: def $al killed $al killed $eax ; GFNI-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b diff --git a/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll b/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll index 016c9a4d..66a6fd3 100644 --- a/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll +++ b/llvm/test/CodeGen/X86/code-model-elf-text-sections.ll @@ -13,9 +13,20 @@ ; RUN: llvm-readelf -S %t | FileCheck %s --check-prefix=LARGE-DS ; SMALL: .text {{.*}} AX {{.*}} +; SMALL: .ltext {{.*}} AXl {{.*}} +; SMALL: .ltext.2 {{.*}} AXl {{.*}} +; SMALL: .foo {{.*}} AX {{.*}} ; SMALL-DS: .text.func {{.*}} AX {{.*}} +; SMALL-DS: .ltext {{.*}} AXl {{.*}} +; SMALL-DS: .ltext.2 {{.*}} AXl {{.*}} +; SMALL-DS: .foo {{.*}} AX {{.*}} ; LARGE: .ltext {{.*}} AXl {{.*}} +; LARGE: .ltext.2 {{.*}} AXl {{.*}} +; LARGE: .foo {{.*}} AX {{.*}} ; LARGE-DS: .ltext.func {{.*}} AXl {{.*}} +; LARGE-DS: .ltext {{.*}} AXl {{.*}} +; LARGE-DS: .ltext.2 {{.*}} AXl {{.*}} +; LARGE-DS: .foo {{.*}} AX {{.*}} target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64--linux" @@ -23,3 +34,15 @@ target triple = "x86_64--linux" define void @func() { ret void } + +define void @ltext() section ".ltext" { + ret void +} + +define void @ltext2() section ".ltext.2" { + ret void +} + +define void @foo() section ".foo" { + ret void +} diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll index 337edef..3a695bf 100644 --- a/llvm/test/CodeGen/X86/combine-ptest.ll +++ b/llvm/test/CodeGen/X86/combine-ptest.ll @@ -397,6 +397,48 @@ define i1 @PR38788(<4 x i32> %0, <4 x i32> %1) { ret i1 %7 } +define i32 @PR88958_1(ptr %0, <2 x i64> %1) { +; SSE-LABEL: PR88958_1: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm1 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ptest %xmm0, %xmm1 +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX-LABEL: PR88958_1: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa (%rdi), %xmm1 +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: vptest %xmm0, %xmm1 +; AVX-NEXT: sete %al +; AVX-NEXT: retq + %3 = load <2 x i64>, ptr %0 + %4 = tail call i32 @llvm.x86.sse41.ptestz(<2 x i64> %3, <2 x i64> %1) + ret i32 %4 +} + +define i32 @PR88958_2(ptr %0, <2 x i64> %1) { +; SSE-LABEL: PR88958_2: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm1 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: ptest %xmm0, %xmm1 +; SSE-NEXT: setb %al +; SSE-NEXT: retq +; +; AVX-LABEL: PR88958_2: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa (%rdi), %xmm1 +; AVX-NEXT: xorl %eax, %eax +; AVX-NEXT: vptest %xmm0, %xmm1 +; AVX-NEXT: setb %al +; AVX-NEXT: retq + %3 = load <2 x i64>, ptr %0 + %4 = tail call i32 @llvm.x86.sse41.ptestc(<2 x i64> %3, <2 x i64> %1) + ret i32 %4 +} + declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/propagate-disjoint-in-shl-or.ll b/llvm/test/CodeGen/X86/propagate-disjoint-in-shl-or.ll new file mode 100644 index 0000000..e38840f --- /dev/null +++ b/llvm/test/CodeGen/X86/propagate-disjoint-in-shl-or.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64 %s -start-before=x86-isel -o - | FileCheck %s + +define i32 @add_shl_or_disjoint(i32 %x) { +; CHECK-LABEL: add_shl_or_disjoint: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal 165036822(,%rdi,4), %eax +; CHECK-NEXT: retq + %or = or disjoint i32 %x, 4027584529 + %shl = shl i32 %or, 2 + %add = add i32 %shl, 1234567890 + ret i32 %add +} + diff --git a/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir new file mode 100644 index 0000000..67f8cc7 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-pred-succ-size.mir @@ -0,0 +1,260 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=3 -tail-dup-succ-size=3 %s -o - | FileCheck %s -check-prefix=LIMIT +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=4 -tail-dup-succ-size=4 %s -o - | FileCheck %s -check-prefix=NOLIMIT + +--- +name: foo +tracksRegLiveness: true +jumpTable: + kind: block-address + entries: + - id: 0 + blocks: [ '%bb.2', '%bb.3', '%bb.4', '%bb.5' ] + - id: 1 + blocks: [ '%bb.9', '%bb.10', '%bb.11', '%bb.12' ] +body: | + ; LIMIT-LABEL: name: foo + ; LIMIT: bb.0: + ; LIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; LIMIT-NEXT: liveins: $rdi, $esi + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; LIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; LIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.2: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.3: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.4: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.5: + ; LIMIT-NEXT: successors: %bb.7(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.7 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.6: + ; LIMIT-NEXT: successors: + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.7: + ; LIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[SHR32ri3]], %bb.5, [[SHR32ri2]], %bb.4, [[SHR32ri1]], %bb.3, [[MOV32rm]], %bb.2 + ; LIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri4]], 7, implicit-def dead $eflags + ; LIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri1]], %subreg.sub_32bit + ; LIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.9: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.10: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.11: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; LIMIT-NEXT: JMP_1 %bb.13 + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.12: + ; LIMIT-NEXT: successors: %bb.13(0x80000000) + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; LIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; LIMIT-NEXT: {{ $}} + ; LIMIT-NEXT: bb.13: + ; LIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[SHR32ri7]], %bb.12, [[SHR32ri6]], %bb.11, [[SHR32ri5]], %bb.10, [[MOV32rm4]], %bb.9 + ; LIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI1]], [[PHI]], implicit-def dead $eflags + ; LIMIT-NEXT: $eax = COPY [[OR32rr]] + ; LIMIT-NEXT: RET 0, $eax + ; + ; NOLIMIT-LABEL: name: foo + ; NOLIMIT: bb.0: + ; NOLIMIT-NEXT: successors: %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000), %bb.5(0x20000000) + ; NOLIMIT-NEXT: liveins: $rdi, $esi + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $esi + ; NOLIMIT-NEXT: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi + ; NOLIMIT-NEXT: [[SHR32ri:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri:%[0-9]+]]:gr32 = AND32ri [[SHR32ri]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, killed [[AND32ri]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG]], %jump-table.0, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.2: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri1:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri1:%[0-9]+]]:gr32 = AND32ri [[SHR32ri1]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG1:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri1]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG1]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.3: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri2:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm1]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri3:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri2:%[0-9]+]]:gr32 = AND32ri [[SHR32ri3]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG2:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri2]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG2]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.4: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri4:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm2]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri5:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri3:%[0-9]+]]:gr32 = AND32ri [[SHR32ri5]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG3:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri3]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG3]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.5: + ; NOLIMIT-NEXT: successors: %bb.9(0x20000000), %bb.10(0x20000000), %bb.11(0x20000000), %bb.12(0x20000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[MOV32rm3:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri6:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm3]], 3, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SHR32ri7:%[0-9]+]]:gr32 = SHR32ri [[COPY]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[AND32ri4:%[0-9]+]]:gr32 = AND32ri [[SHR32ri7]], 7, implicit-def dead $eflags + ; NOLIMIT-NEXT: [[SUBREG_TO_REG4:%[0-9]+]]:gr64_nosp = SUBREG_TO_REG 0, [[AND32ri4]], %subreg.sub_32bit + ; NOLIMIT-NEXT: JMP64m $noreg, 8, [[SUBREG_TO_REG4]], %jump-table.1, $noreg + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.6: + ; NOLIMIT-NEXT: successors: + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.9: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm4:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.10: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI1:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm5:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri8:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm5]], 1, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.11: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI2:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm6:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri9:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm6]], 2, implicit-def dead $eflags + ; NOLIMIT-NEXT: JMP_1 %bb.13 + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.12: + ; NOLIMIT-NEXT: successors: %bb.13(0x80000000) + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: [[PHI3:%[0-9]+]]:gr32 = PHI [[MOV32rm]], %bb.2, [[SHR32ri2]], %bb.3, [[SHR32ri4]], %bb.4, [[SHR32ri6]], %bb.5 + ; NOLIMIT-NEXT: [[MOV32rm7:%[0-9]+]]:gr32 = MOV32rm [[COPY1]], 1, $noreg, 0, $noreg + ; NOLIMIT-NEXT: [[SHR32ri10:%[0-9]+]]:gr32 = SHR32ri [[MOV32rm7]], 6, implicit-def dead $eflags + ; NOLIMIT-NEXT: {{ $}} + ; NOLIMIT-NEXT: bb.13: + ; NOLIMIT-NEXT: [[PHI4:%[0-9]+]]:gr32 = PHI [[PHI]], %bb.9, [[PHI1]], %bb.10, [[PHI2]], %bb.11, [[PHI3]], %bb.12 + ; NOLIMIT-NEXT: [[PHI5:%[0-9]+]]:gr32 = PHI [[SHR32ri10]], %bb.12, [[SHR32ri9]], %bb.11, [[SHR32ri8]], %bb.10, [[MOV32rm4]], %bb.9 + ; NOLIMIT-NEXT: [[OR32rr:%[0-9]+]]:gr32 = OR32rr [[PHI5]], [[PHI4]], implicit-def dead $eflags + ; NOLIMIT-NEXT: $eax = COPY [[OR32rr]] + ; NOLIMIT-NEXT: RET 0, $eax + bb.0: + liveins: $rdi, $esi + + %11:gr32 = COPY $esi + %10:gr64 = COPY $rdi + %13:gr32 = SHR32ri %11, 1, implicit-def dead $eflags + %14:gr32 = AND32ri %13, 7, implicit-def dead $eflags + %12:gr64_nosp = SUBREG_TO_REG 0, killed %14, %subreg.sub_32bit + + bb.1: + successors: %bb.2, %bb.3, %bb.4, %bb.5 + + JMP64m $noreg, 8, %12, %jump-table.0, $noreg + + bb.2: + %0:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.7 + + bb.3: + %17:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %1:gr32 = SHR32ri %17, 1, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.4: + %16:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %2:gr32 = SHR32ri %16, 2, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.5: + %15:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %3:gr32 = SHR32ri %15, 3, implicit-def dead $eflags + JMP_1 %bb.7 + + bb.6: + successors: + + bb.7: + %4:gr32 = PHI %3, %bb.5, %2, %bb.4, %1, %bb.3, %0, %bb.2 + %19:gr32 = SHR32ri %11, 2, implicit-def dead $eflags + %20:gr32 = AND32ri %19, 7, implicit-def dead $eflags + %18:gr64_nosp = SUBREG_TO_REG 0, killed %20, %subreg.sub_32bit + + bb.8: + successors: %bb.9, %bb.10, %bb.11, %bb.12 + + JMP64m $noreg, 8, %18, %jump-table.1, $noreg + + bb.9: + %5:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + JMP_1 %bb.13 + + bb.10: + %23:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %6:gr32 = SHR32ri %23, 1, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.11: + %22:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %7:gr32 = SHR32ri %22, 2, implicit-def dead $eflags + JMP_1 %bb.13 + + bb.12: + %21:gr32 = MOV32rm %10, 1, $noreg, 0, $noreg + %8:gr32 = SHR32ri %21, 6, implicit-def dead $eflags + + bb.13: + %9:gr32 = PHI %8, %bb.12, %7, %bb.11, %6, %bb.10, %5, %bb.9 + %24:gr32 = OR32rr %9, %4, implicit-def dead $eflags + $eax = COPY %24 + RET 0, $eax + +... diff --git a/llvm/test/CodeGen/X86/vector-bitreverse.ll b/llvm/test/CodeGen/X86/vector-bitreverse.ll index b22b508..90cc3d5 100644 --- a/llvm/test/CodeGen/X86/vector-bitreverse.ll +++ b/llvm/test/CodeGen/X86/vector-bitreverse.ll @@ -61,36 +61,18 @@ define i8 @test_bitreverse_i8(i8 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i8: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: rolb $4, %dil -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andb $51, %al -; GFNISSE-NEXT: shlb $2, %al -; GFNISSE-NEXT: shrb $2, %dil -; GFNISSE-NEXT: andb $51, %dil -; GFNISSE-NEXT: orb %dil, %al -; GFNISSE-NEXT: movl %eax, %ecx -; GFNISSE-NEXT: andb $85, %cl -; GFNISSE-NEXT: addb %cl, %cl -; GFNISSE-NEXT: shrb %al -; GFNISSE-NEXT: andb $85, %al -; GFNISSE-NEXT: orb %cl, %al +; GFNISSE-NEXT: movd %edi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movd %xmm0, %eax +; GFNISSE-NEXT: # kill: def $al killed $al killed $eax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i8: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: rolb $4, %dil -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andb $51, %al -; GFNIAVX-NEXT: shlb $2, %al -; GFNIAVX-NEXT: shrb $2, %dil -; GFNIAVX-NEXT: andb $51, %dil -; GFNIAVX-NEXT: orb %dil, %al -; GFNIAVX-NEXT: movl %eax, %ecx -; GFNIAVX-NEXT: andb $85, %cl -; GFNIAVX-NEXT: addb %cl, %cl -; GFNIAVX-NEXT: shrb %al -; GFNIAVX-NEXT: andb $85, %al -; GFNIAVX-NEXT: orb %cl, %al +; GFNIAVX-NEXT: vmovd %edi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovd %xmm0, %eax +; GFNIAVX-NEXT: # kill: def $al killed $al killed $eax ; GFNIAVX-NEXT: retq %b = call i8 @llvm.bitreverse.i8(i8 %a) ret i8 %b @@ -153,47 +135,19 @@ define i16 @test_bitreverse_i16(i16 %a) nounwind { ; ; GFNISSE-LABEL: test_bitreverse_i16: ; GFNISSE: # %bb.0: -; GFNISSE-NEXT: # kill: def $edi killed $edi def $rdi -; GFNISSE-NEXT: rolw $8, %di -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNISSE-NEXT: shll $4, %eax -; GFNISSE-NEXT: shrl $4, %edi -; GFNISSE-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNISSE-NEXT: orl %eax, %edi -; GFNISSE-NEXT: movl %edi, %eax -; GFNISSE-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNISSE-NEXT: shrl $2, %edi -; GFNISSE-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNISSE-NEXT: leal (%rdi,%rax,4), %eax -; GFNISSE-NEXT: movl %eax, %ecx -; GFNISSE-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNISSE-NEXT: shrl %eax -; GFNISSE-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNISSE-NEXT: leal (%rax,%rcx,2), %eax +; GFNISSE-NEXT: movd %edi, %xmm0 +; GFNISSE-NEXT: gf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: movd %xmm0, %eax +; GFNISSE-NEXT: rolw $8, %ax ; GFNISSE-NEXT: # kill: def $ax killed $ax killed $eax ; GFNISSE-NEXT: retq ; ; GFNIAVX-LABEL: test_bitreverse_i16: ; GFNIAVX: # %bb.0: -; GFNIAVX-NEXT: # kill: def $edi killed $edi def $rdi -; GFNIAVX-NEXT: rolw $8, %di -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $3855, %eax # imm = 0xF0F -; GFNIAVX-NEXT: shll $4, %eax -; GFNIAVX-NEXT: shrl $4, %edi -; GFNIAVX-NEXT: andl $3855, %edi # imm = 0xF0F -; GFNIAVX-NEXT: orl %eax, %edi -; GFNIAVX-NEXT: movl %edi, %eax -; GFNIAVX-NEXT: andl $13107, %eax # imm = 0x3333 -; GFNIAVX-NEXT: shrl $2, %edi -; GFNIAVX-NEXT: andl $13107, %edi # imm = 0x3333 -; GFNIAVX-NEXT: leal (%rdi,%rax,4), %eax -; GFNIAVX-NEXT: movl %eax, %ecx -; GFNIAVX-NEXT: andl $21845, %ecx # imm = 0x5555 -; GFNIAVX-NEXT: shrl %eax -; GFNIAVX-NEXT: andl $21845, %eax # imm = 0x5555 -; GFNIAVX-NEXT: leal (%rax,%rcx,2), %eax +; GFNIAVX-NEXT: vmovd %edi, %xmm0 +; GFNIAVX-NEXT: vgf2p8affineqb $0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX-NEXT: vmovd %xmm0, %eax +; GFNIAVX-NEXT: rolw $8, %ax ; GFNIAVX-NEXT: # kill: def $ax killed $ax killed $eax ; GFNIAVX-NEXT: retq %b = call i16 @llvm.bitreverse.i16(i16 %a) diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 0c76c14..4859a8e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -305,6 +305,37 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) { ret <4 x float> %2 } +define <8 x i32> @combine_blend_of_permutes_v8i32(<4 x i64> %a0, <4 x i64> %a1) { +; AVX1-LABEL: combine_blend_of_permutes_v8i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX2-LABEL: combine_blend_of_permutes_v8i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1] +; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,3,0,1] +; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3,4],ymm0[5,6],ymm1[7] +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: combine_blend_of_permutes_v8i32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm2 = [4,21,6,23,16,1,2,19] +; AVX512-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512-NEXT: ret{{[l|q]}} + %s0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> + %s1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> + %x0 = bitcast <4 x i64> %s0 to <8 x i32> + %x1 = bitcast <4 x i64> %s1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 12, i32 5, i32 6, i32 15> + ret <8 x i32> %r +} + define <2 x double> @constant_fold_vpermilvar_pd() { ; CHECK-LABEL: constant_fold_vpermilvar_pd: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll index f53b1ee..e87e810 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll @@ -973,3 +973,47 @@ define <8 x i64> @combine_vpermvar_insertion_as_broadcast_v8i64(i64 %a0) { %2 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %1, <8 x i64> zeroinitializer) ret <8 x i64> %2 } + +define <16 x i32> @blend_of_permutes_v16i32(<8 x i64> %a0, <8x i64> %a1) { +; X86-AVX512F-LABEL: blend_of_permutes_v16i32: +; X86-AVX512F: # %bb.0: +; X86-AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X86-AVX512F-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X86-AVX512F-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X86-AVX512F-NEXT: kmovw %eax, %k1 +; X86-AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X86-AVX512F-NEXT: retl +; +; X86-AVX512BW-LABEL: blend_of_permutes_v16i32: +; X86-AVX512BW: # %bb.0: +; X86-AVX512BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X86-AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X86-AVX512BW-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X86-AVX512BW-NEXT: kmovd %eax, %k1 +; X86-AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X86-AVX512BW-NEXT: retl +; +; X64-AVX512F-LABEL: blend_of_permutes_v16i32: +; X64-AVX512F: # %bb.0: +; X64-AVX512F-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X64-AVX512F-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X64-AVX512F-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X64-AVX512F-NEXT: kmovw %eax, %k1 +; X64-AVX512F-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X64-AVX512F-NEXT: retq +; +; X64-AVX512BW-LABEL: blend_of_permutes_v16i32: +; X64-AVX512BW: # %bb.0: +; X64-AVX512BW-NEXT: vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5] +; X64-AVX512BW-NEXT: vpermq {{.*#+}} zmm1 = zmm1[2,3,0,1,6,7,4,5] +; X64-AVX512BW-NEXT: movw $-25958, %ax # imm = 0x9A9A +; X64-AVX512BW-NEXT: kmovd %eax, %k1 +; X64-AVX512BW-NEXT: vmovdqa32 %zmm1, %zmm0 {%k1} +; X64-AVX512BW-NEXT: retq + %s0 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> + %s1 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5> + %x0 = bitcast <8 x i64> %s0 to <16 x i32> + %x1 = bitcast <8 x i64> %s1 to <16 x i32> + %r = shufflevector <16 x i32> %x0, <16 x i32> %x1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31> + ret <16 x i32> %r +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll index 5eb017b..8d213d2 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-sse41.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 ; Combine tests involving SSE41 target shuffles (BLEND,INSERTPS,MOVZX) @@ -22,6 +22,45 @@ define <16 x i8> @combine_vpshufb_as_movzx(<16 x i8> %a0) { ret <16 x i8> %res0 } +define <4 x i32> @combine_blend_of_permutes_v4i32(<2 x i64> %a0, <2 x i64> %a1) { +; SSE-LABEL: combine_blend_of_permutes_v4i32: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] +; SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3],xmm2[4,5],xmm0[6,7] +; SSE-NEXT: retq +; +; AVX1-LABEL: combine_blend_of_permutes_v4i32: +; AVX1: # %bb.0: +; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: combine_blend_of_permutes_v4i32: +; AVX2: # %bb.0: +; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: combine_blend_of_permutes_v4i32: +; AVX512: # %bb.0: +; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm2 = [2,19,0,17] +; AVX512-NEXT: vpermt2d %zmm1, %zmm2, %zmm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %s0 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 0> + %s1 = shufflevector <2 x i64> %a1, <2 x i64> undef, <2 x i32> <i32 1, i32 0> + %x0 = bitcast <2 x i64> %s0 to <4 x i32> + %x1 = bitcast <2 x i64> %s1 to <4 x i32> + %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <4 x i32> <i32 0, i32 5, i32 2, i32 7> + ret <4 x i32> %r +} + define <16 x i8> @PR50049(ptr %p1, ptr %p2) { ; SSE-LABEL: PR50049: ; SSE: # %bb.0: @@ -56,6 +95,107 @@ define <16 x i8> @PR50049(ptr %p1, ptr %p2) { ; SSE-NEXT: pand %xmm5, %xmm1 ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: retq +; +; AVX1-LABEL: PR50049: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u] +; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqa (%rsi), %xmm2 +; AVX1-NEXT: vmovdqa 16(%rsi), %xmm5 +; AVX1-NEXT: vmovdqa 32(%rsi), %xmm6 +; AVX1-NEXT: vpshufb %xmm3, %xmm6, %xmm3 +; AVX1-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4] +; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128] +; AVX1-NEXT: vpshufb %xmm4, %xmm5, %xmm5 +; AVX1-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] +; AVX1-NEXT: vpmullw %xmm5, %xmm1, %xmm1 +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero +; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX1-NEXT: vpmullw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: PR50049: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqa (%rdi), %xmm0 +; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX2-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u] +; AVX2-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX2-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa (%rsi), %xmm2 +; AVX2-NEXT: vmovdqa 16(%rsi), %xmm5 +; AVX2-NEXT: vmovdqa 32(%rsi), %xmm6 +; AVX2-NEXT: vpshufb %xmm3, %xmm6, %xmm3 +; AVX2-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4] +; AVX2-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128] +; AVX2-NEXT: vpshufb %xmm4, %xmm5, %xmm5 +; AVX2-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; AVX2-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX2-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vpmullw %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: PR50049: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512-NEXT: vmovdqa 16(%rdi), %xmm1 +; AVX512-NEXT: vmovdqa 32(%rdi), %xmm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [1,4,7,10,13,128,128,128,128,128,128,u,u,u,u,u] +; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,0,3,6,9,12,15,u,u,u,u,u] +; AVX512-NEXT: vpshufb %xmm4, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vmovdqa (%rsi), %xmm2 +; AVX512-NEXT: vmovdqa 16(%rsi), %xmm5 +; AVX512-NEXT: vmovdqa 32(%rsi), %xmm6 +; AVX512-NEXT: vpshufb %xmm3, %xmm6, %xmm3 +; AVX512-NEXT: vpshufb %xmm4, %xmm2, %xmm2 +; AVX512-NEXT: vpor %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm3 = [5,6,7,8,9,10,128,128,128,128,128,0,1,2,3,4] +; AVX512-NEXT: vpshufb %xmm3, %xmm2, %xmm2 +; AVX512-NEXT: vmovdqa {{.*#+}} xmm4 = [128,128,128,128,128,128,2,5,8,11,14,128,128,128,128,128] +; AVX512-NEXT: vpshufb %xmm4, %xmm5, %xmm5 +; AVX512-NEXT: vpor %xmm5, %xmm2, %xmm2 +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero +; AVX512-NEXT: vpshufb %xmm3, %xmm0, %xmm0 +; AVX512-NEXT: vpshufb %xmm4, %xmm1, %xmm1 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512-NEXT: vpmullw %ymm2, %ymm0, %ymm0 +; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %x1 = load <48 x i8>, ptr %p1, align 16 %x2 = load <48 x i8>, ptr %p2, align 16 %s1 = shufflevector <48 x i8> %x1, <48 x i8> poison, <16 x i32> <i32 0, i32 3, i32 6, i32 9, i32 12, i32 15, i32 18, i32 21, i32 24, i32 27, i32 30, i32 33, i32 36, i32 39, i32 42, i32 45> diff --git a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll index 4990979..55e436b 100644 --- a/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll +++ b/llvm/test/DebugInfo/debugify-bogus-dbg-value.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s 2>&1 | FileCheck %s define <2 x i64> @test-fun(<2 x i64> %A) !dbg !6 { %and = and <2 x i64> %A, <i64 23, i64 42>, !dbg !14 diff --git a/llvm/test/DebugInfo/debugify-each.ll b/llvm/test/DebugInfo/debugify-each.ll index e9241de..7685b57 100644 --- a/llvm/test/DebugInfo/debugify-each.ll +++ b/llvm/test/DebugInfo/debugify-each.ll @@ -40,6 +40,40 @@ ; RUN: opt -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t ; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE +; Repeat the same checks with debug intrinsics enabled. +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O3 -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes='default<O3>' %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -debugify-each -O3 -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes='instrprof,instrprof,sroa,sccp' -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -O1 < %s | opt -O2 -o /dev/null + +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-quiet -debugify-each -O1 < %s 2>&1 | count 0 + +; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s -S -o %t.before +; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s -S -o %t.after +; RUN: diff %t.before %t.after + +; RUN: opt --experimental-debuginfo-iterators=false -O1 < %s | llvm-dis -o %t.before +; RUN: opt --experimental-debuginfo-iterators=false -O1 -debugify-each < %s | llvm-dis -o %t.after +; RUN: diff %t.before %t.after + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=instsimplify -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=FUNCTION-PASS-ONE + +; RUN: opt --experimental-debuginfo-iterators=false -debugify-each -passes=globalopt -S -o /dev/null < %s 2> %t +; RUN: FileCheck %s -input-file=%t -check-prefix=MODULE-PASS-ONE + define void @foo(i32 %arg) { call i32 asm "bswap $0", "=r,r"(i32 %arg) ret void diff --git a/llvm/test/DebugInfo/debugify-export.ll b/llvm/test/DebugInfo/debugify-export.ll index 6e5952d..30333ca9 100644 --- a/llvm/test/DebugInfo/debugify-export.ll +++ b/llvm/test/DebugInfo/debugify-export.ll @@ -1,6 +1,9 @@ ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s ; RUN: opt %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -disable-output -debugify-each -debugify-quiet -debugify-export - -passes=globalopt | FileCheck %s + ; CHECK: Pass Name ; CHECK-SAME: # of missing debug values ; CHECK-SAME: # of missing locations diff --git a/llvm/test/DebugInfo/debugify-ignore-phi.ll b/llvm/test/DebugInfo/debugify-ignore-phi.ll index 322ccaf..643df1d 100644 --- a/llvm/test/DebugInfo/debugify-ignore-phi.ll +++ b/llvm/test/DebugInfo/debugify-ignore-phi.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s -S 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S 2>&1 | FileCheck %s define void @test_phi(i1 %cond) !dbg !6 { br i1 %cond, label %1, label %2, !dbg !11 diff --git a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll index 941b294..4cbbfc5 100644 --- a/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll +++ b/llvm/test/DebugInfo/debugify-original-no-dbg-info.ll @@ -1,4 +1,5 @@ ; RUN: opt -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -verify-debuginfo-preserve -passes=instcombine -S -o - < %s 2>&1 | FileCheck %s ; CHECK: ModuleDebugify (original debuginfo): Skipping module without debug info ; CHECK-NEXT: CheckModuleDebugify (original debuginfo): Skipping module without debug info diff --git a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll index 1c5daa1..04b7636 100644 --- a/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll +++ b/llvm/test/DebugInfo/debugify-report-missing-locs-only.ll @@ -1,4 +1,5 @@ ; RUN: opt -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar" +; RUN: opt --experimental-debuginfo-iterators=false -passes=check-debugify < %s -S -o - 2>&1 | FileCheck %s -implicit-check-not "WARNING: Instruction with empty DebugLoc in function bar" ; CHECK: WARNING: Instruction with empty DebugLoc in function foo -- ret void define void @foo() !dbg !6 { diff --git a/llvm/test/DebugInfo/debugify.ll b/llvm/test/DebugInfo/debugify.ll index 5ce6795..191015f 100644 --- a/llvm/test/DebugInfo/debugify.ll +++ b/llvm/test/DebugInfo/debugify.ll @@ -25,6 +25,33 @@ ; RUN: opt -enable-debugify -O1 < %s | opt -O2 -o /dev/null ; RUN: opt -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null +;; Perform the same checks again for intrinsic debug info +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify -S -o - < %s | FileCheck %s + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-REPEAT +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-REPEAT + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,check-debugify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=verify -S -o - < %s | \ +; RUN: FileCheck %s -implicit-check-not="CheckModuleDebugify: FAIL" + +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,strip,check-debugify -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-WARN + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -passes=strip -S -o - < %s 2>&1 | \ +; RUN: FileCheck %s -check-prefix=CHECK-WARN + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -S -o - < %s 2>&1 | FileCheck %s -check-prefix=PASS + +; RUN: opt --experimental-debuginfo-iterators=false -enable-debugify -O1 < %s | opt -O2 -o /dev/null +; RUN: opt --experimental-debuginfo-iterators=false -passes=debugify,mem2reg,check-debugify < %s | opt -O2 -o /dev/null + ; CHECK-LABEL: define void @foo define void @foo() { ; CHECK: ret void, !dbg ![[RET1:.*]] diff --git a/llvm/test/DebugInfo/pr37964.ll b/llvm/test/DebugInfo/pr37964.ll index 9581f1a..63db67d 100644 --- a/llvm/test/DebugInfo/pr37964.ll +++ b/llvm/test/DebugInfo/pr37964.ll @@ -1,4 +1,5 @@ ; RUN: opt -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false -disable-output -debugify-each -passes=gvn < %s 2>&1 | FileCheck %s ; CHECK-NOT: ERROR: Instruction with empty DebugLoc in function _Z3bazv -- {{%.*}} = phi ; CHECK: CheckFunctionDebugify [GVNPass]: PASS diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll index 4676aee..b72f717a 100644 --- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll +++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll @@ -1,5 +1,5 @@ ; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s -; RUN: opt %s -passes=debugify,early-cse -earlycse-debug-hash -S --try-experimental-debuginfo-iterators | FileCheck %s +; RUN: opt --experimental-debuginfo-iterators=false %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s define i32 @foo(i64 %nose, i32 %more) { ; CHECK-LABEL: @foo( ; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned diff --git a/llvm/test/DebugInfo/verify-di-preserve.ll b/llvm/test/DebugInfo/verify-di-preserve.ll index a2f1b1d..92fc62a 100644 --- a/llvm/test/DebugInfo/verify-di-preserve.ll +++ b/llvm/test/DebugInfo/verify-di-preserve.ll @@ -1,10 +1,10 @@ ; RUN: opt %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s -; RUN: opt --try-experimental-debuginfo-iterators %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s +; RUN: opt --experimental-debuginfo-iterators=false %s -verify-debuginfo-preserve -passes=instcombine -disable-output 2>&1 | FileCheck --check-prefix=VERIFY %s ; VERIFY: CheckModuleDebugify (original debuginfo): ; RUN: opt %s -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s -; RUN: opt %s --try-experimental-debuginfo-iterators -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s +; RUN: opt %s --experimental-debuginfo-iterators=false -verify-each-debuginfo-preserve -O2 -disable-output 2>&1 | FileCheck --check-prefix=VERIFY-EACH %s ; VERIFY-EACH: DeadArgumentEliminationPass ; VERIFY-EACH: GlobalDCEPass diff --git a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s index 1c95bde..936486b 100644 --- a/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s +++ b/llvm/test/ExecutionEngine/JITLink/x86-64/ELF_vtune.s @@ -20,12 +20,14 @@ main: .cfi_def_cfa_offset 16 .cfi_offset 6, -16 movq %rsp, %rbp + pushq %rbx .cfi_def_cfa_register 6 - movl %edi, -4(%rbp) - movq %rsi, -16(%rbp) - movl -4(%rbp), %ebx + movl %edi, -16(%rbp) + movq %rsi, -24(%rbp) + movl -16(%rbp), %ebx addl $1, %ebx - movl $0, %eax + movl $0, %eax + popq %rbx popq %rbp .cfi_def_cfa 7, 8 ret diff --git a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s index 18e0ede..0e27d63 100644 --- a/llvm/test/MC/LoongArch/Relocations/relax-addsub.s +++ b/llvm/test/MC/LoongArch/Relocations/relax-addsub.s @@ -28,7 +28,7 @@ # RELAX: Relocations [ # RELAX-NEXT: Section ({{.*}}) .rela.text { -# RELAX-NEXT: 0x4 R_LARCH_ALIGN {{.*}} 0x4 +# RELAX-NEXT: 0x4 R_LARCH_ALIGN .text 0x4 # RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0 # RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0 # RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0 diff --git a/llvm/test/MC/LoongArch/Relocations/relax-align.s b/llvm/test/MC/LoongArch/Relocations/relax-align.s index 294fd9f..0246d5b 100644 --- a/llvm/test/MC/LoongArch/Relocations/relax-align.s +++ b/llvm/test/MC/LoongArch/Relocations/relax-align.s @@ -63,17 +63,19 @@ ret ## Test the symbol index is different from .text. .section .text2, "ax" .p2align 4 +.p2align 4, , 4 break 7 # RELOC: Relocations [ # RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text { -# RELAX-RELOC-NEXT: 0x24 R_LARCH_ALIGN .Lla-relax-align0 0x4 -# RELAX-RELOC-NEXT: 0x34 R_LARCH_ALIGN .Lla-relax-align0 0x5 -# RELAX-RELOC-NEXT: 0x50 R_LARCH_ALIGN .Lla-relax-align0 0x4 -# RELAX-RELOC-NEXT: 0x60 R_LARCH_ALIGN .Lla-relax-align0 0xB04 -# RELAX-RELOC-NEXT: 0x70 R_LARCH_ALIGN .Lla-relax-align0 0x4 +# RELAX-RELOC-NEXT: 0x24 R_LARCH_ALIGN .text 0x4 +# RELAX-RELOC-NEXT: 0x34 R_LARCH_ALIGN .text 0x5 +# RELAX-RELOC-NEXT: 0x50 R_LARCH_ALIGN .text 0x4 +# RELAX-RELOC-NEXT: 0x60 R_LARCH_ALIGN .text 0xB04 +# RELAX-RELOC-NEXT: 0x70 R_LARCH_ALIGN .text 0x4 # RELAX-RELOC-NEXT: } # RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text2 { -# RELAX-RELOC-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align1 0x4 +# RELAX-RELOC-NEXT: 0x0 R_LARCH_ALIGN .text2 0x4 +# RELAX-RELOC-NEXT: 0xC R_LARCH_ALIGN .text2 0x404 # RELAX-RELOC-NEXT: } # RELOC-NEXT: ] diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index a8f493f..8835ff2 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -270,6 +270,9 @@ .attribute arch, "rv32iza64rs1p0" # CHECK: attribute 5, "rv32i2p1_za64rs1p0" +.attribute arch, "rv32izama16b" +# CHECK: attribute 5, "rv32i2p1_zama16b1p0" + .attribute arch, "rv32izawrs1p0" # CHECK: attribute 5, "rv32i2p1_zawrs1p0" diff --git a/llvm/test/MC/RISCV/rv32zcmop-invalid.s b/llvm/test/MC/RISCV/rv32zcmop-invalid.s index 71d72d5..fb6252f 100644 --- a/llvm/test/MC/RISCV/rv32zcmop-invalid.s +++ b/llvm/test/MC/RISCV/rv32zcmop-invalid.s @@ -1,7 +1,7 @@ # RUN: not llvm-mc -triple riscv32 -mattr=+zcmop < %s 2>&1 | FileCheck %s -cmop.0 # CHECK: :[[@LINE]]:1: error: unrecognized instruction mnemonic +c.mop.0 # CHECK: :[[@LINE]]:1: error: unrecognized instruction mnemonic -cmop.1 t0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction +c.mop.1 t0 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction -cmop.1 0x0 # CHECK: :[[@LINE]]:8: error: invalid operand for instruction +c.mop.1 0x0 # CHECK: :[[@LINE]]:9: error: invalid operand for instruction diff --git a/llvm/test/MC/RISCV/rvzcmop-valid.s b/llvm/test/MC/RISCV/rvzcmop-valid.s index c6bb4a1..dd5d26a 100644 --- a/llvm/test/MC/RISCV/rvzcmop-valid.s +++ b/llvm/test/MC/RISCV/rvzcmop-valid.s @@ -9,34 +9,34 @@ # RUN: | llvm-objdump --mattr=+zcmop -d -r - \ # RUN: | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s -# CHECK-ASM-AND-OBJ: cmop.1 +# CHECK-ASM-AND-OBJ: c.mop.1 # CHECK-ASM: encoding: [0x81,0x60] -cmop.1 +c.mop.1 -# CHECK-ASM-AND-OBJ: cmop.3 +# CHECK-ASM-AND-OBJ: c.mop.3 # CHECK-ASM: encoding: [0x81,0x61] -cmop.3 +c.mop.3 -# CHECK-ASM-AND-OBJ: cmop.5 +# CHECK-ASM-AND-OBJ: c.mop.5 # CHECK-ASM: encoding: [0x81,0x62] -cmop.5 +c.mop.5 -# CHECK-ASM-AND-OBJ: cmop.7 +# CHECK-ASM-AND-OBJ: c.mop.7 # CHECK-ASM: encoding: [0x81,0x63] -cmop.7 +c.mop.7 -# CHECK-ASM-AND-OBJ: cmop.9 +# CHECK-ASM-AND-OBJ: c.mop.9 # CHECK-ASM: encoding: [0x81,0x64] -cmop.9 +c.mop.9 -# CHECK-ASM-AND-OBJ: cmop.11 +# CHECK-ASM-AND-OBJ: c.mop.11 # CHECK-ASM: encoding: [0x81,0x65] -cmop.11 +c.mop.11 -# CHECK-ASM-AND-OBJ: cmop.13 +# CHECK-ASM-AND-OBJ: c.mop.13 # CHECK-ASM: encoding: [0x81,0x66] -cmop.13 +c.mop.13 -# CHECK-ASM-AND-OBJ: cmop.15 +# CHECK-ASM-AND-OBJ: c.mop.15 # CHECK-ASM: encoding: [0x81,0x67] -cmop.15 +c.mop.15 diff --git a/llvm/test/Other/lint.ll b/llvm/test/Other/lint.ll index 6b31b31..6fd2d40c 100644 --- a/llvm/test/Other/lint.ll +++ b/llvm/test/Other/lint.ll @@ -124,13 +124,6 @@ define void @0() nounwind { ret void } -; CHECK: va_start called in a non-varargs function -declare void @llvm.va_start(ptr) -define void @not_vararg(ptr %p) nounwind { - call void @llvm.va_start(ptr %p) - ret void -} - ; CHECK: Undefined behavior: Branch to non-blockaddress define void @use_indbr() { indirectbr ptr @foo, [label %block] diff --git a/llvm/test/TableGen/def-multiple-operands.td b/llvm/test/TableGen/def-multiple-operands.td new file mode 100644 index 0000000..b747c58 --- /dev/null +++ b/llvm/test/TableGen/def-multiple-operands.td @@ -0,0 +1,37 @@ +// RUN: llvm-tblgen -gen-instr-info -I %p/../../include %s | FileCheck %s + +include "llvm/Target/Target.td" + +def archInstrInfo : InstrInfo {} + +def arch : Target { + let InstructionSet = archInstrInfo; +} + +def R0 : Register<"r0">; +def P0 : Register<"p0">; +def R32 : RegisterClass<"MyNS", [i32], 0, (add R0)>; +def P1 : RegisterClass<"MyNS", [i1], 0, (add P0)>; + +def Reg3Opnd : Operand<OtherVT> { + let MIOperandInfo = (ops R32, R32, P1); +} + +// The following checks verify that 'MCInstrDesc' entry for 'InstA' has the +// expected 'NumOperands' and 'NumDefs', i.e. 'InstA' should have 3 defs out of +// 4 operands. + +// CHECK: archInstrTable {{.* = \{}} +// CHECK: {{\{}} +// CHECK: {{\{}} [[ID:[0-9]+]], 4, 3, 13, {{.+\}, \/\/}} +// CHECK-SAME: Inst #[[ID]] = InstA +def InstA : Instruction { + let Namespace = "MyNS"; + let Size = 13; + // InstA should have 3 defs out of 4 operands. + let OutOperandList = (outs Reg3Opnd:$dst); + let InOperandList = (ins i32imm:$c); + field bits<8> Inst; + field bits<8> SoftFail = 0; + let hasSideEffects = false; +} diff --git a/llvm/test/ThinLTO/X86/devirt.ll b/llvm/test/ThinLTO/X86/devirt.ll index 472e43d..c4c2f86 100644 --- a/llvm/test/ThinLTO/X86/devirt.ll +++ b/llvm/test/ThinLTO/X86/devirt.ll @@ -27,24 +27,36 @@ ; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) ; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) ; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) +; NOENABLESPLITFLAG-DAG: [[B_RV:\^[0-9]+]] = gv: (name: "_ZTV1B_RV", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 8), (virtFunc: [[An]], offset: 12)), refs: ([[B_RV]], [[Bf]], [[An]]) +; NOENABLESPLITFLAG-DAG: [[C_RV:\^[0-9]+]] = gv: (name: "_ZTV1C_RV", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 8), (virtFunc: [[An]], offset: 12)), refs: ([[C_RV]], [[An]], [[Cf]]) +; NOENABLESPLITFLAG-DAG: [[D_RV:\^[0-9]+]] = gv: (name: "_ZTV1D_RV", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 8)), refs: ([[D_RV]], [[Dm]]) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]]))) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, [[B]]))) ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C", summary: ((offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A_RV", summary: ((offset: 8, [[B_RV]]), (offset: 8, [[C_RV]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B_RV", summary: ((offset: 8, [[B_RV]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C_RV", summary: ((offset: 8, [[C_RV]]))) ; Type Id on _ZTV1D should have been promoted ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "1.{{.*}}", summary: ((offset: 16, [[D]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "2.{{.*}}", summary: ((offset: 8, [[D_RV]]))) ; Index based WPD ; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ ; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,test_rv,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ ; RUN: -r=%t2.o,_ZN1B1fEi,p \ ; RUN: -r=%t2.o,_ZN1C1fEi,p \ ; RUN: -r=%t2.o,_ZN1D1mEi,p \ ; RUN: -r=%t2.o,_ZTV1B,px \ ; RUN: -r=%t2.o,_ZTV1C,px \ -; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1B_RV,px \ +; RUN: -r=%t2.o,_ZTV1C_RV,px \ +; RUN: -r=%t2.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=REMARK ; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR ; Check that we're able to prevent specific function from being @@ -54,18 +66,24 @@ ; RUN: -wholeprogramdevirt-skip=_ZN1A1nEi \ ; RUN: -o %t3 \ ; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,test_rv,px \ ; RUN: -r=%t2.o,_ZN1A1nEi,p \ ; RUN: -r=%t2.o,_ZN1B1fEi,p \ ; RUN: -r=%t2.o,_ZN1C1fEi,p \ ; RUN: -r=%t2.o,_ZN1D1mEi,p \ ; RUN: -r=%t2.o,_ZTV1B,px \ ; RUN: -r=%t2.o,_ZTV1C,px \ -; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=SKIP +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1B_RV,px \ +; RUN: -r=%t2.o,_ZTV1C_RV,px \ +; RUN: -r=%t2.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=SKIP ; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ ; RUN: -whole-program-visibility \ ; RUN: -o %t3 \ ; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,test_rv,px \ ; RUN: -r=%t.o,_ZN1A1nEi,p \ ; RUN: -r=%t.o,_ZN1B1fEi,p \ ; RUN: -r=%t.o,_ZN1C1fEi,p \ @@ -73,17 +91,26 @@ ; RUN: -r=%t.o,_ZTV1B, \ ; RUN: -r=%t.o,_ZTV1C, \ ; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZTV1B_RV, \ +; RUN: -r=%t.o,_ZTV1C_RV, \ +; RUN: -r=%t.o,_ZTV1D_RV, \ ; RUN: -r=%t.o,_ZN1A1nEi, \ ; RUN: -r=%t.o,_ZN1B1fEi, \ ; RUN: -r=%t.o,_ZN1C1fEi, \ ; RUN: -r=%t.o,_ZN1D1mEi, \ ; RUN: -r=%t.o,_ZTV1B,px \ ; RUN: -r=%t.o,_ZTV1C,px \ -; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail +; RUN: -r=%t.o,_ZTV1D,px \ +; RUN: -r=%t.o,_ZTV1B_RV,px \ +; RUN: -r=%t.o,_ZTV1C_RV,px \ +; RUN: -r=%t.o,_ZTV1D_RV,px \ +; RUN: 2>&1 | FileCheck %s --check-prefix=REMARK --dump-input=fail ; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR ; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi ; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi ; SKIP-NOT: devirtualized a call to _ZN1A1nEi @@ -99,6 +126,25 @@ target triple = "x86_64-grtev4-linux-gnu" @_ZTV1C = constant { [4 x ptr] } { [4 x ptr] [ptr null, ptr undef, ptr @_ZN1C1fEi, ptr @_ZN1A1nEi] }, !type !0, !type !2 @_ZTV1D = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr undef, ptr @_ZN1D1mEi] }, !type !3 +@_ZTV1B_RV = constant { [4 x i32] } { [4 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1B1fEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1B_RV, i32 0, i32 0, i32 2) to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A1nEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1B_RV, i32 0, i32 0, i32 3) to i64)) to i32) +] }, !type !7, !type !8 + +@_ZTV1C_RV = constant { [4 x i32] } { [4 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1C1fEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1C_RV, i32 0, i32 0, i32 2) to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1A1nEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @_ZTV1C_RV, i32 0, i32 0, i32 3) to i64)) to i32) +] }, !type !7, !type !9 + +@_ZTV1D_RV = constant { [3 x i32] } { [3 x i32] [ + i32 0, + i32 undef, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @_ZN1D1mEi to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1D_RV, i32 0, i32 0, i32 2) to i64)) to i32) +] }, !type !10 ; CHECK-IR-LABEL: define {{(noundef )?}}i32 @test define i32 @test(ptr %obj, ptr %obj2, i32 %a) { @@ -136,6 +182,43 @@ entry: ; CHECK-IR-LABEL: ret i32 ; CHECK-IR-LABEL: } +declare ptr @llvm.load.relative.i32(ptr, i32) + +; CHECK-IR-LABEL: define {{.*}}i32 @test_rv +define i32 @test_rv(ptr %obj, ptr %obj2, i32 %a) { +entry: + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"_ZTS1A_RV") + call void @llvm.assume(i1 %p) + %fptr1_rv = call ptr @llvm.load.relative.i32(ptr %vtable, i32 4) + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; Ensure !prof and !callees metadata for indirect call promotion removed. + ; CHECK-IR-NOT: prof + ; CHECK-IR-NOT: callees + %call = tail call i32 %fptr1_rv(ptr nonnull %obj, i32 %a), !prof !5, !callees !6 + + %fptr22_rv = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22_rv(ptr nonnull %obj, i32 %call) + + %vtable2 = load ptr, ptr %obj2 + %p2 = call i1 @llvm.type.test(ptr %vtable2, metadata !11) + call void @llvm.assume(i1 %p2) + + %fptr33_rv = call ptr @llvm.load.relative.i32(ptr %vtable2, i32 0) + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33_rv(ptr nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -165,3 +248,9 @@ attributes #0 = { noinline optnone } !4 = distinct !{} !5 = !{!"VP", i32 0, i64 1, i64 1621563287929432257, i64 1} !6 = !{ptr @_ZN1A1nEi} + +!7 = !{i64 8, !"_ZTS1A_RV"} +!8 = !{i64 8, !"_ZTS1B_RV"} +!9 = !{i64 8, !"_ZTS1C_RV"} +!10 = !{i64 8, !11} +!11 = distinct !{} diff --git a/llvm/test/Transforms/Attributor/nofpclass.ll b/llvm/test/Transforms/Attributor/nofpclass.ll index 7828629..d2d11e0 100644 --- a/llvm/test/Transforms/Attributor/nofpclass.ll +++ b/llvm/test/Transforms/Attributor/nofpclass.ll @@ -54,6 +54,18 @@ define float @returned_poison() { ret float poison } +; Know nothing +define float @returned_freeze_poison() { +; CHECK-LABEL: define noundef float @returned_freeze_poison() { +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: [[FREEZE_POISON:%.*]] = freeze float poison +; CHECK-NEXT: ret float [[FREEZE_POISON]] +; + call void @unknown() + %freeze.poison = freeze float poison + ret float %freeze.poison +} + define double @returned_snan() { ; CHECK-LABEL: define noundef nofpclass(qnan inf zero sub norm) double @returned_snan() { ; CHECK-NEXT: call void @unknown() diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-bad.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-bad.ll index 9d87fdb..9a4bff8 100644 --- a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-bad.ll +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-bad.ll @@ -16,14 +16,32 @@ declare { ptr, i1 } @llvm.type.checked.load(ptr, i32, metadata) ; CHECK: @vtable = internal unnamed_addr constant { [3 x i32] } zeroinitializer, align 8, !type !0, !type !1, !vcall_visibility !2 +@vtable2 = internal unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc3 to i64), i64 ptrtoint (ptr @vtable2 to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc4 to i64), i64 ptrtoint (ptr @vtable2 to i64)) to i32), + + ; a "bad" relative pointer because it's base is not the @vtable symbol + i32 trunc (i64 sub (i64 ptrtoint (ptr @weird_ref_3 to i64), i64 ptrtoint (ptr @weird_ref_4 to i64)) to i32) +]}, align 4, !type !3, !type !4, !vcall_visibility !{i64 2} +!3 = !{i64 0, !"vfunc3.type"} +!4 = !{i64 4, !"vfunc4.type"} + +; CHECK: @vtable2 = internal unnamed_addr constant { [3 x i32] } zeroinitializer, align 4, !type !3, !type !4, !vcall_visibility !2 + define internal void @vfunc1() { ret void } define internal void @vfunc2() { ret void } define internal void @weird_ref_1() { ret void } define internal void @weird_ref_2() { ret void } +declare void @vfunc3() +declare void @vfunc4() +declare void @weird_ref_3() +declare void @weird_ref_4() define void @main() { %1 = ptrtoint ptr @vtable to i64 ; to keep @vtable alive call void @weird_ref_2() + %2 = ptrtoint ptr @vtable2 to i64 ; to keep @vtable2 alive + call void @weird_ref_4() ret void } diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-gep.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-gep.ll index fb45d37..d89b024 100644 --- a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-gep.ll +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers-gep.ll @@ -19,6 +19,20 @@ declare { ptr, i1 } @llvm.type.checked.load(ptr, i32, metadata) ; CHECK-SAME: i32 0 ; CHECK-SAME: ] }, align 8, !type !0, !type !1, !vcall_visibility !2 +@vtable2 = internal unnamed_addr constant { [4 x i32] } { [4 x i32] [ + i32 42, + i32 1337, + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc3_live_extern to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @vtable2, i32 0, i32 0, i32 2) to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc4_dead_extern to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @vtable2, i32 0, i32 0, i32 2) to i64)) to i32) +]}, align 4, !type !3, !type !4, !vcall_visibility !{i64 2} +!3 = !{i64 8, !"vfunc3.type"} +!4 = !{i64 12, !"vfunc4.type"} + +; CHECK: @vtable2 = internal unnamed_addr constant { [4 x i32] } { [4 x i32] [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc3_live_extern to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [4 x i32] }, ptr @vtable2, i32 0, i32 0, i32 2) to i64)) to i32), +; CHECK-SAME: i32 0 +; CHECK-SAME: ] }, align 4, !type !3, !type !4, !vcall_visibility !2 + ; (1) vfunc1_live is referenced from @main, stays alive define internal void @vfunc1_live() { ; CHECK: define internal void @vfunc1_live( @@ -31,9 +45,19 @@ define internal void @vfunc2_dead() { ret void } +; (3) vfunc3_live_extern is referenced from @main, stays alive +; CHECK: declare void @vfunc3_live_extern +declare void @vfunc3_live_extern() + +; (4) vfunc4_dead_extern is never referenced, gets removed and vtable slot is null'd +; CHECK-NOT: declare void @vfunc4_dead_extern +declare void @vfunc4_dead_extern() + define void @main() { %1 = ptrtoint ptr @vtable to i64 ; to keep @vtable alive %2 = tail call { ptr, i1 } @llvm.type.checked.load(ptr null, i32 0, metadata !"vfunc1.type") + %3 = ptrtoint ptr @vtable2 to i64 ; to keep @vtable2 alive + %4 = tail call { ptr, i1 } @llvm.type.checked.load(ptr null, i32 0, metadata !"vfunc3.type") ret void } diff --git a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers.ll b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers.ll index 76a617e..0b70148 100644 --- a/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers.ll +++ b/llvm/test/Transforms/GlobalDCE/virtual-functions-relative-pointers.ll @@ -6,17 +6,31 @@ declare { ptr, i1 } @llvm.type.checked.load(ptr, i32, metadata) ; A vtable with "relative pointers", slots don't contain pointers to implementations, but instead have an i32 offset from the vtable itself to the implementation. @vtable = internal unnamed_addr constant { [2 x i32] } { [2 x i32] [ - i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc1_live to i64), i64 ptrtoint (ptr @vtable to i64)) to i32), - i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc2_dead to i64), i64 ptrtoint (ptr @vtable to i64)) to i32) + i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc1_live to i64), i64 ptrtoint (ptr @vtable to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc2_dead to i64), i64 ptrtoint (ptr @vtable to i64)) to i32) ]}, align 8, !type !0, !type !1, !vcall_visibility !{i64 2} !0 = !{i64 0, !"vfunc1.type"} !1 = !{i64 4, !"vfunc2.type"} ; CHECK: @vtable = internal unnamed_addr constant { [2 x i32] } { [2 x i32] [ -; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc1_live to i64), i64 ptrtoint (ptr @vtable to i64)) to i32), +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr @vfunc1_live to i64), i64 ptrtoint (ptr @vtable to i64)) to i32), ; CHECK-SAME: i32 0 ; CHECK-SAME: ] }, align 8, !type !0, !type !1, !vcall_visibility !2 +; Similar to above, but the vtable is more aligned to how C++ relative vtables look. +; That is, the functions may not be dso-local. +@vtable2 = internal unnamed_addr constant { [2 x i32] } { [2 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc3_live_extern to i64), i64 ptrtoint (ptr @vtable2 to i64)) to i32), + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc4_dead_extern to i64), i64 ptrtoint (ptr @vtable2 to i64)) to i32) +]}, align 4, !type !3, !type !4, !vcall_visibility !{i64 2} +!3 = !{i64 0, !"vfunc3.type"} +!4 = !{i64 4, !"vfunc4.type"} + +; CHECK: @vtable2 = internal unnamed_addr constant { [2 x i32] } { [2 x i32] [ +; CHECK-SAME: i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vfunc3_live_extern to i64), i64 ptrtoint (ptr @vtable2 to i64)) to i32), +; CHECK-SAME: i32 0 +; CHECK-SAME: ] }, align 4, !type !3, !type !4, !vcall_visibility !2 + ; (1) vfunc1_live is referenced from @main, stays alive define internal void @vfunc1_live() { ; CHECK: define internal void @vfunc1_live( @@ -29,9 +43,19 @@ define internal void @vfunc2_dead() { ret void } +; (3) vfunc3_live_extern is referenced from @main, stays alive +; CHECK: declare void @vfunc3_live_extern +declare void @vfunc3_live_extern() + +; (4) vfunc4_dead_extern is never referenced, gets removed and vtable slot is null'd +; CHECK-NOT: declare void @vfunc4_dead_extern +declare void @vfunc4_dead_extern() + define void @main() { %1 = ptrtoint ptr @vtable to i64 ; to keep @vtable alive %2 = tail call { ptr, i1 } @llvm.type.checked.load(ptr null, i32 0, metadata !"vfunc1.type") + %3 = ptrtoint ptr @vtable2 to i64 ; to keep @vtable2 alive + %4 = tail call { ptr, i1 } @llvm.type.checked.load(ptr null, i32 0, metadata !"vfunc3.type") ret void } diff --git a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll index 6f659a8..c5f656c 100644 --- a/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/AArch64/widen-loop-comp.ll @@ -41,7 +41,7 @@ define i32 @test1() { ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_COND]] ; CHECK: if.then: ; CHECK-NEXT: [[I_05_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[I_05_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw nsw i64 [[I_05_LCSSA_WIDE]] to i32 ; CHECK-NEXT: store i32 [[TMP5]], ptr @idx, align 4 ; CHECK-NEXT: br label [[FOR_END:%.*]] ; CHECK: for.cond.for.end.loopexit_crit_edge: @@ -237,7 +237,7 @@ define i32 @test4(i32 %a) { ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[OR]] to i8 ; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i32 [[INDVARS_IV_NEXT]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: @@ -466,7 +466,7 @@ define i32 @test9(ptr %a, i32 %b, i32 %init) { ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[ADD]] = add nsw i32 [[SUM_0]], [[TMP1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 0, [[TMP2]] ; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_COND]], label [[FOR_END]] ; CHECK: for.end: @@ -997,7 +997,7 @@ define i32 @test16_unsigned_neg(i32 %start, ptr %p, ptr %q, i32 %x) { ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[GUARDED:%.*]] ; CHECK: guarded: diff --git a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll index d05755b..4e0c503 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/iv-widen.ll @@ -23,7 +23,7 @@ define void @loop_0(ptr %a) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[B18_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[B24:%.*]] ] ; CHECK-NEXT: call void @use(i64 [[INDVARS_IV]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[O:%.*]] = getelementptr i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[O]], align 4 ; CHECK-NEXT: [[T:%.*]] = icmp eq i32 [[V]], 0 @@ -37,7 +37,7 @@ define void @loop_0(ptr %a) { ; CHECK-NEXT: ret void ; CHECK: exit24: ; CHECK-NEXT: [[DOT02_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV]], [[B18]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[DOT02_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[DOT02_LCSSA_WIDE]] to i32 ; CHECK-NEXT: call void @dummy(i32 [[TMP1]]) ; CHECK-NEXT: unreachable ; @@ -159,7 +159,7 @@ declare void @dummy(i32) declare void @dummy.i64(i64) -define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, ptr %lined, i8 %tmp1) { +define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, ptr %lined, i8 %arg) { ; CHECK-LABEL: @loop_2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1 @@ -180,12 +180,12 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, ptr %lined, i8 %tmp1) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 1, [[FOR_BODY2_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY2]] ] ; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP3]], [[INDVARS_IV]] ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[LINED:%.*]], i64 [[TMP4]] -; CHECK-NEXT: store i8 [[TMP1:%.*]], ptr [[ADD_PTR]], align 1 +; CHECK-NEXT: store i8 [[ARG:%.*]], ptr [[ADD_PTR]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_BODY3_PREHEADER:%.*]] ; CHECK: for.body3.preheader: -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = trunc nsw i64 [[TMP3]] to i32 ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 ; CHECK-NEXT: [[WIDE_TRIP_COUNT7:%.*]] = zext i32 [[SIZE]] to i64 ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] @@ -193,7 +193,7 @@ define void @loop_2(i32 %size, i32 %nsteps, i32 %hsize, ptr %lined, i8 %tmp1) { ; CHECK-NEXT: [[INDVARS_IV3:%.*]] = phi i64 [ 1, [[FOR_BODY3_PREHEADER]] ], [ [[INDVARS_IV_NEXT4:%.*]], [[FOR_BODY3]] ] ; CHECK-NEXT: [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], [[INDVARS_IV3]] ; CHECK-NEXT: [[ADD_PTR2:%.*]] = getelementptr inbounds i8, ptr [[LINED]], i64 [[TMP7]] -; CHECK-NEXT: store i8 [[TMP1]], ptr [[ADD_PTR2]], align 1 +; CHECK-NEXT: store i8 [[ARG]], ptr [[ADD_PTR2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1 ; CHECK-NEXT: [[EXITCOND8:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], [[WIDE_TRIP_COUNT7]] ; CHECK-NEXT: br i1 [[EXITCOND8]], label [[FOR_BODY3]], label [[FOR_INC_LOOPEXIT:%.*]] @@ -222,7 +222,7 @@ for.body2: %add4 = add nsw i32 %add, %k %idx.ext = sext i32 %add4 to i64 %add.ptr = getelementptr inbounds i8, ptr %lined, i64 %idx.ext - store i8 %tmp1, ptr %add.ptr, align 1 + store i8 %arg, ptr %add.ptr, align 1 %inc = add nsw i32 %k, 1 %cmp2 = icmp slt i32 %inc, %size br i1 %cmp2, label %for.body2, label %for.body3 @@ -233,7 +233,7 @@ for.body3: %add5 = add nuw i32 %add, %l %idx.ext2 = zext i32 %add5 to i64 %add.ptr2 = getelementptr inbounds i8, ptr %lined, i64 %idx.ext2 - store i8 %tmp1, ptr %add.ptr2, align 1 + store i8 %arg, ptr %add.ptr2, align 1 %inc2 = add nsw i32 %l, 1 %cmp3 = icmp slt i32 %inc2, %size br i1 %cmp3, label %for.body3, label %for.inc diff --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll index 54bb995..01c95da 100644 --- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll +++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll @@ -142,7 +142,7 @@ define void @nestedIV(ptr %address, i32 %limit) nounwind { ; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]] ; CHECK: innerexit: ; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nsw i64 [[INNERCOUNT_LCSSA_WIDE]] to i32 ; CHECK-NEXT: br label [[OUTERMERGE]] ; CHECK: outermerge: ; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP3]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/hoist-wide-inc-for-narrow-use-recompute-flags.ll b/llvm/test/Transforms/IndVarSimplify/hoist-wide-inc-for-narrow-use-recompute-flags.ll index cc99ee3..1135ca9 100644 --- a/llvm/test/Transforms/IndVarSimplify/hoist-wide-inc-for-narrow-use-recompute-flags.ll +++ b/llvm/test/Transforms/IndVarSimplify/hoist-wide-inc-for-narrow-use-recompute-flags.ll @@ -15,7 +15,7 @@ define void @test_pr82243(ptr %f) { ; CHECK-NEXT: [[GEP_IV_EXT:%.*]] = getelementptr i32, ptr [[F]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i32 1, ptr [[GEP_IV_EXT]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[SHL:%.*]] = shl i32 123, [[TMP0]] ; CHECK-NEXT: [[GEP_SHL:%.*]] = getelementptr i32, ptr [[F]], i32 [[SHL]] ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] diff --git a/llvm/test/Transforms/IndVarSimplify/iv-sext.ll b/llvm/test/Transforms/IndVarSimplify/iv-sext.ll index 450913f..95a036f 100644 --- a/llvm/test/Transforms/IndVarSimplify/iv-sext.ll +++ b/llvm/test/Transforms/IndVarSimplify/iv-sext.ll @@ -99,7 +99,7 @@ define void @t(ptr %pval1, ptr %peakWeight, ptr %nrgReducePeakrate, i32 %bandEdg ; CHECK-NEXT: [[VAL35_LCSSA:%.*]] = phi float [ [[VAL35]], [[BB5]] ] ; CHECK-NEXT: [[VAL31_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[BB5]] ] ; CHECK-NEXT: [[VAL30_LCSSA:%.*]] = phi float [ [[VAL30]], [[BB5]] ] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[VAL31_LCSSA_WIDE]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc nsw i64 [[VAL31_LCSSA_WIDE]] to i32 ; CHECK-NEXT: br label [[BB7]] ; CHECK: bb7: ; CHECK-NEXT: [[DISTERBHI_2_LCSSA]] = phi float [ [[VAL30_LCSSA]], [[BB5_BB7_CRIT_EDGE]] ], [ [[DISTERBHI_0_PH]], [[BB5_PREHEADER]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll index 59a0241..a83e9ce 100644 --- a/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll +++ b/llvm/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll @@ -22,7 +22,7 @@ define void @foo(ptr %A, ptr %B, ptr %C, i32 %N) { ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[DIV0:%.*]] = udiv i32 5, [[TMP3]] ; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[ADD3]], [[DIV0]] ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] @@ -224,7 +224,7 @@ define i32 @foo3(i32 %M) { ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc nsw i64 [[TMP3]] to i32 ; CHECK-NEXT: [[IDXPROM4:%.*]] = zext i32 [[TMP4]] to i64 ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x i32], ptr @a, i64 0, i64 [[IDXPROM4]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX5]], align 4 @@ -365,7 +365,7 @@ define i32 @foo5(ptr %input, i32 %length, ptr %in) { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[FOR_BODY_LR_PH]] ] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[INPUT]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP5:%.*]] = trunc nuw nsw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], [[TMP5]] ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[MUL]] to i64 ; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[IN:%.*]], i64 [[IDX_EXT]] @@ -514,7 +514,7 @@ define void @foo7(i32 %n, ptr %a, i32 %x) { ; CHECK-NEXT: [[TMP2:%.*]] = shl nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP2]], 1 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]] diff --git a/llvm/test/Transforms/IndVarSimplify/lftr.ll b/llvm/test/Transforms/IndVarSimplify/lftr.ll index 41db925..7f4820f 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr.ll @@ -525,7 +525,7 @@ define float @wide_trip_count_test3(ptr %b, ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[INDVARS_IV]], 20 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TEMP:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[TEMP]] ; CHECK-NEXT: [[ADD1]] = fadd float [[SUM_07]], [[MUL]] @@ -584,7 +584,7 @@ define float @wide_trip_count_test4(ptr %b, ; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 20 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TEMP:%.*]] = load float, ptr [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[CONV]], [[TEMP]] ; CHECK-NEXT: [[ADD1]] = fadd float [[SUM_07]], [[MUL]] diff --git a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll index c35c5ba..579b853 100644 --- a/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll +++ b/llvm/test/Transforms/IndVarSimplify/no-iv-rewrite.ll @@ -213,7 +213,7 @@ define void @maxvisitor(i32 %limit, ptr %base) nounwind { ; CHECK-NEXT: [[CMP19:%.*]] = icmp sgt i32 [[VAL]], [[MAX]] ; CHECK-NEXT: br i1 [[CMP19]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: br label [[LOOP_INC]] ; CHECK: if.else: ; CHECK-NEXT: br label [[LOOP_INC]] diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll index 5c22ba1..bbdee02 100644 --- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll +++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll @@ -180,7 +180,7 @@ define void @test_neg(ptr %array_length_ptr, ptr %base, ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], [[LIMIT:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] ; CHECK: for.end: diff --git a/llvm/test/Transforms/IndVarSimplify/pr25578.ll b/llvm/test/Transforms/IndVarSimplify/pr25578.ll index d8adc17..380e817 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr25578.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr25578.ll @@ -13,7 +13,7 @@ L1_header: ; CHECK: L2_header: ; CHECK: %[[INDVAR:.*]] = phi i64 -; CHECK: %[[TRUNC:.*]] = trunc i64 %[[INDVAR]] to i32 +; CHECK: %[[TRUNC:.*]] = trunc nuw nsw i64 %[[INDVAR]] to i32 L2_header: %i = phi i32 [ 0, %L1_header ], [ %i_next, %L2_latch ] %i_prom = sext i32 %i to i64 diff --git a/llvm/test/Transforms/IndVarSimplify/pr55925.ll b/llvm/test/Transforms/IndVarSimplify/pr55925.ll index 312a829..2ad187a 100644 --- a/llvm/test/Transforms/IndVarSimplify/pr55925.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr55925.ll @@ -14,11 +14,11 @@ define void @test(ptr %p) personality ptr undef { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]]) ; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]] ; CHECK: loop.latch: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @foo(i32 [[TMP1]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[LOOP]] @@ -56,8 +56,8 @@ define void @test_critedge(i1 %c, ptr %p) personality ptr undef { ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br i1 [[C:%.*]], label [[LOOP_INVOKE:%.*]], label [[LOOP_OTHER:%.*]] ; CHECK: loop.invoke: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[RES:%.*]] = invoke i32 @foo(i32 returned [[TMP0]]) ; CHECK-NEXT: to label [[LOOP_LATCH]] unwind label [[EXIT:%.*]] ; CHECK: loop.other: diff --git a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll index d473103..9c89834 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative-countdown.ll @@ -223,7 +223,7 @@ define void @sext_postinc(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: @@ -262,7 +262,7 @@ define void @sext_preinc(ptr %A, i32 %start) { ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP1]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: @@ -366,7 +366,7 @@ define void @zext_postinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[J_016_US:%.*]] = phi i32 [ [[INC_US:%.*]], [[FOR_BODY]] ], [ [[START]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nuw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -513,13 +513,13 @@ define void @sext_postinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], 1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: @@ -556,13 +556,13 @@ define void @sext_preinc_offset_constant_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: @@ -808,13 +808,13 @@ define void @sext_postinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[INDVARS_IV_NEXT]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: @@ -851,13 +851,13 @@ define void @sext_preinc_offset_constant_minus_one(ptr %A, i32 %start) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[CMP2_US:%.*]] = icmp ugt i32 [[TMP2]], 6 ; CHECK-NEXT: br i1 [[CMP2_US]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: diff --git a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative.ll b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative.ll index 739db26..e00eaaf 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-nonnegative.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-nonnegative.ll @@ -150,7 +150,7 @@ define void @sext_add_nuw(ptr %A, i32 %offset, i32 %M) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) @@ -185,7 +185,7 @@ define void @sext_add_noflags(ptr %A, i32 %offset, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP0]], [[OFFSET:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -223,7 +223,7 @@ define void @zext_add_nsw(ptr %A, i32 %offset, i32 %M) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) @@ -293,7 +293,7 @@ define void @zext_add_noflags(ptr %A, i32 %offset, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP0]], [[OFFSET:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -399,7 +399,7 @@ define void @zext_nneg_add_noflags(ptr %A, i32 %offset, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[TMP0]], [[OFFSET:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext nneg i32 [[ADD_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -475,7 +475,7 @@ define void @sext_mul_nuw(ptr %A, i32 %multiple, i32 %M) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nuw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) @@ -510,7 +510,7 @@ define void @sext_mul_noflags(ptr %A, i32 %multiple, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP0]], [[MULTIPLE:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = sext i32 [[MUL_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -548,7 +548,7 @@ define void @zext_mul_nsw(ptr %A, i32 %multiple, i32 %M) { ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVARS_IV]], [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = trunc nsw i64 [[TMP1]] to i32 ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[TMP2]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] ; CHECK-NEXT: tail call void @use_ptr(ptr [[ARRAYIDX_US]]) @@ -618,7 +618,7 @@ define void @zext_mul_noflags(ptr %A, i32 %multiple, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP0]], [[MULTIPLE:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext i32 [[MUL_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] @@ -724,7 +724,7 @@ define void @zext_nneg_mul_noflags(ptr %A, i32 %multiple, i32 %M) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32 ; CHECK-NEXT: [[MUL_US:%.*]] = mul i32 [[TMP0]], [[MULTIPLE:%.*]] ; CHECK-NEXT: [[IDXPROM_US:%.*]] = zext nneg i32 [[MUL_US]] to i64 ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IDXPROM_US]] diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll index 50b0e7a..2f264a2 100644 --- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/mem-intrinsics.ll @@ -141,4 +141,4 @@ attributes #1 = { argmemonly nounwind } !5 = distinct !{!5, !"some domain"} !6 = !{!7} !7 = distinct !{!7, !5, !"some scope 2"} -!8 = !{i64 0, i64 8, null} +!8 = !{i64 0, i64 8, !0} diff --git a/llvm/test/Transforms/Inline/inline-switch-default-2.ll b/llvm/test/Transforms/Inline/inline-switch-default-2.ll index 8d3e24c..82dae1c 100644 --- a/llvm/test/Transforms/Inline/inline-switch-default-2.ll +++ b/llvm/test/Transforms/Inline/inline-switch-default-2.ll @@ -4,50 +4,6 @@ ; Check for scenarios without TTI. define i64 @foo1(i64 %a) { -; LOOKUPTABLE-LABEL: define i64 @foo1( -; LOOKUPTABLE-SAME: i64 [[TMP0:%.*]]) { -; LOOKUPTABLE-NEXT: switch i64 [[TMP0]], label [[DEFAULT_BRANCH_I:%.*]] [ -; LOOKUPTABLE-NEXT: i64 0, label [[BRANCH_0_I:%.*]] -; LOOKUPTABLE-NEXT: i64 2, label [[BRANCH_2_I:%.*]] -; LOOKUPTABLE-NEXT: i64 4, label [[BRANCH_4_I:%.*]] -; LOOKUPTABLE-NEXT: i64 6, label [[BRANCH_6_I:%.*]] -; LOOKUPTABLE-NEXT: ] -; LOOKUPTABLE: branch_0.i: -; LOOKUPTABLE-NEXT: br label [[BAR1_EXIT:%.*]] -; LOOKUPTABLE: branch_2.i: -; LOOKUPTABLE-NEXT: br label [[BAR1_EXIT]] -; LOOKUPTABLE: branch_4.i: -; LOOKUPTABLE-NEXT: br label [[BAR1_EXIT]] -; LOOKUPTABLE: branch_6.i: -; LOOKUPTABLE-NEXT: br label [[BAR1_EXIT]] -; LOOKUPTABLE: default_branch.i: -; LOOKUPTABLE-NEXT: br label [[BAR1_EXIT]] -; LOOKUPTABLE: bar1.exit: -; LOOKUPTABLE-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ], [ 3, [[DEFAULT_BRANCH_I]] ] -; LOOKUPTABLE-NEXT: ret i64 [[TMP2]] -; -; SWITCH-LABEL: define i64 @foo1( -; SWITCH-SAME: i64 [[TMP0:%.*]]) { -; SWITCH-NEXT: switch i64 [[TMP0]], label [[DEFAULT_BRANCH_I:%.*]] [ -; SWITCH-NEXT: i64 0, label [[BRANCH_0_I:%.*]] -; SWITCH-NEXT: i64 2, label [[BRANCH_2_I:%.*]] -; SWITCH-NEXT: i64 4, label [[BRANCH_4_I:%.*]] -; SWITCH-NEXT: i64 6, label [[BRANCH_6_I:%.*]] -; SWITCH-NEXT: ] -; SWITCH: branch_0.i: -; SWITCH-NEXT: br label [[BAR1_EXIT:%.*]] -; SWITCH: branch_2.i: -; SWITCH-NEXT: br label [[BAR1_EXIT]] -; SWITCH: branch_4.i: -; SWITCH-NEXT: br label [[BAR1_EXIT]] -; SWITCH: branch_6.i: -; SWITCH-NEXT: br label [[BAR1_EXIT]] -; SWITCH: default_branch.i: -; SWITCH-NEXT: br label [[BAR1_EXIT]] -; SWITCH: bar1.exit: -; SWITCH-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ], [ 3, [[DEFAULT_BRANCH_I]] ] -; SWITCH-NEXT: ret i64 [[TMP2]] -; ; CHECK-LABEL: define i64 @foo1( ; CHECK-SAME: i64 [[A:%.*]]) { ; CHECK-NEXT: [[B:%.*]] = call i64 @bar1(i64 [[A]]) @@ -58,50 +14,6 @@ define i64 @foo1(i64 %a) { } define i64 @foo2(i64 %a) { -; LOOKUPTABLE-LABEL: define i64 @foo2( -; LOOKUPTABLE-SAME: i64 [[TMP0:%.*]]) { -; LOOKUPTABLE-NEXT: switch i64 [[TMP0]], label [[UNREACHABLEDEFAULT_I:%.*]] [ -; LOOKUPTABLE-NEXT: i64 0, label [[BRANCH_0_I:%.*]] -; LOOKUPTABLE-NEXT: i64 2, label [[BRANCH_2_I:%.*]] -; LOOKUPTABLE-NEXT: i64 4, label [[BRANCH_4_I:%.*]] -; LOOKUPTABLE-NEXT: i64 6, label [[BRANCH_6_I:%.*]] -; LOOKUPTABLE-NEXT: ] -; LOOKUPTABLE: branch_0.i: -; LOOKUPTABLE-NEXT: br label [[BAR2_EXIT:%.*]] -; LOOKUPTABLE: branch_2.i: -; LOOKUPTABLE-NEXT: br label [[BAR2_EXIT]] -; LOOKUPTABLE: branch_4.i: -; LOOKUPTABLE-NEXT: br label [[BAR2_EXIT]] -; LOOKUPTABLE: branch_6.i: -; LOOKUPTABLE-NEXT: br label [[BAR2_EXIT]] -; LOOKUPTABLE: unreachabledefault.i: -; LOOKUPTABLE-NEXT: unreachable -; LOOKUPTABLE: bar2.exit: -; LOOKUPTABLE-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ] -; LOOKUPTABLE-NEXT: ret i64 [[TMP2]] -; -; SWITCH-LABEL: define i64 @foo2( -; SWITCH-SAME: i64 [[TMP0:%.*]]) { -; SWITCH-NEXT: switch i64 [[TMP0]], label [[UNREACHABLEDEFAULT_I:%.*]] [ -; SWITCH-NEXT: i64 0, label [[BRANCH_0_I:%.*]] -; SWITCH-NEXT: i64 2, label [[BRANCH_2_I:%.*]] -; SWITCH-NEXT: i64 4, label [[BRANCH_4_I:%.*]] -; SWITCH-NEXT: i64 6, label [[BRANCH_6_I:%.*]] -; SWITCH-NEXT: ] -; SWITCH: branch_0.i: -; SWITCH-NEXT: br label [[BAR2_EXIT:%.*]] -; SWITCH: branch_2.i: -; SWITCH-NEXT: br label [[BAR2_EXIT]] -; SWITCH: branch_4.i: -; SWITCH-NEXT: br label [[BAR2_EXIT]] -; SWITCH: branch_6.i: -; SWITCH-NEXT: br label [[BAR2_EXIT]] -; SWITCH: unreachabledefault.i: -; SWITCH-NEXT: unreachable -; SWITCH: bar2.exit: -; SWITCH-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0_I]] ], [ 9, [[BRANCH_2_I]] ], [ 2, [[BRANCH_4_I]] ], [ 7, [[BRANCH_6_I]] ] -; SWITCH-NEXT: ret i64 [[TMP2]] -; ; CHECK-LABEL: define i64 @foo2( ; CHECK-SAME: i64 [[A:%.*]]) { ; CHECK-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT_I:%.*]] [ @@ -129,50 +41,6 @@ define i64 @foo2(i64 %a) { } define i64 @bar1(i64 %a) { -; LOOKUPTABLE-LABEL: define i64 @bar1( -; LOOKUPTABLE-SAME: i64 [[TMP0:%.*]]) { -; LOOKUPTABLE-NEXT: switch i64 [[TMP0]], label [[DEFAULT_BRANCH:%.*]] [ -; LOOKUPTABLE-NEXT: i64 0, label [[BRANCH_0:%.*]] -; LOOKUPTABLE-NEXT: i64 2, label [[BRANCH_2:%.*]] -; LOOKUPTABLE-NEXT: i64 4, label [[BRANCH_4:%.*]] -; LOOKUPTABLE-NEXT: i64 6, label [[BRANCH_6:%.*]] -; LOOKUPTABLE-NEXT: ] -; LOOKUPTABLE: branch_0: -; LOOKUPTABLE-NEXT: br label [[EXIT:%.*]] -; LOOKUPTABLE: branch_2: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: branch_4: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: branch_6: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: default_branch: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: exit: -; LOOKUPTABLE-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0]] ], [ 9, [[BRANCH_2]] ], [ 2, [[BRANCH_4]] ], [ 7, [[BRANCH_6]] ], [ 3, [[DEFAULT_BRANCH]] ] -; LOOKUPTABLE-NEXT: ret i64 [[TMP2]] -; -; SWITCH-LABEL: define i64 @bar1( -; SWITCH-SAME: i64 [[TMP0:%.*]]) { -; SWITCH-NEXT: switch i64 [[TMP0]], label [[DEFAULT_BRANCH:%.*]] [ -; SWITCH-NEXT: i64 0, label [[BRANCH_0:%.*]] -; SWITCH-NEXT: i64 2, label [[BRANCH_2:%.*]] -; SWITCH-NEXT: i64 4, label [[BRANCH_4:%.*]] -; SWITCH-NEXT: i64 6, label [[BRANCH_6:%.*]] -; SWITCH-NEXT: ] -; SWITCH: branch_0: -; SWITCH-NEXT: br label [[EXIT:%.*]] -; SWITCH: branch_2: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: branch_4: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: branch_6: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: default_branch: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: exit: -; SWITCH-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0]] ], [ 9, [[BRANCH_2]] ], [ 2, [[BRANCH_4]] ], [ 7, [[BRANCH_6]] ], [ 3, [[DEFAULT_BRANCH]] ] -; SWITCH-NEXT: ret i64 [[TMP2]] -; ; CHECK-LABEL: define i64 @bar1( ; CHECK-SAME: i64 [[A:%.*]]) { ; CHECK-NEXT: switch i64 [[A]], label [[DEFAULT_BRANCH:%.*]] [ @@ -223,50 +91,6 @@ exit: } define i64 @bar2(i64 %a) { -; LOOKUPTABLE-LABEL: define i64 @bar2( -; LOOKUPTABLE-SAME: i64 [[TMP0:%.*]]) { -; LOOKUPTABLE-NEXT: switch i64 [[TMP0]], label [[UNREACHABLEDEFAULT:%.*]] [ -; LOOKUPTABLE-NEXT: i64 0, label [[BRANCH_0:%.*]] -; LOOKUPTABLE-NEXT: i64 2, label [[BRANCH_2:%.*]] -; LOOKUPTABLE-NEXT: i64 4, label [[BRANCH_4:%.*]] -; LOOKUPTABLE-NEXT: i64 6, label [[BRANCH_6:%.*]] -; LOOKUPTABLE-NEXT: ] -; LOOKUPTABLE: branch_0: -; LOOKUPTABLE-NEXT: br label [[EXIT:%.*]] -; LOOKUPTABLE: branch_2: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: branch_4: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: branch_6: -; LOOKUPTABLE-NEXT: br label [[EXIT]] -; LOOKUPTABLE: unreachabledefault: -; LOOKUPTABLE-NEXT: unreachable -; LOOKUPTABLE: exit: -; LOOKUPTABLE-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0]] ], [ 9, [[BRANCH_2]] ], [ 2, [[BRANCH_4]] ], [ 7, [[BRANCH_6]] ] -; LOOKUPTABLE-NEXT: ret i64 [[TMP2]] -; -; SWITCH-LABEL: define i64 @bar2( -; SWITCH-SAME: i64 [[TMP0:%.*]]) { -; SWITCH-NEXT: switch i64 [[TMP0]], label [[UNREACHABLEDEFAULT:%.*]] [ -; SWITCH-NEXT: i64 0, label [[BRANCH_0:%.*]] -; SWITCH-NEXT: i64 2, label [[BRANCH_2:%.*]] -; SWITCH-NEXT: i64 4, label [[BRANCH_4:%.*]] -; SWITCH-NEXT: i64 6, label [[BRANCH_6:%.*]] -; SWITCH-NEXT: ] -; SWITCH: branch_0: -; SWITCH-NEXT: br label [[EXIT:%.*]] -; SWITCH: branch_2: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: branch_4: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: branch_6: -; SWITCH-NEXT: br label [[EXIT]] -; SWITCH: unreachabledefault: -; SWITCH-NEXT: unreachable -; SWITCH: exit: -; SWITCH-NEXT: [[TMP2:%.*]] = phi i64 [ 5, [[BRANCH_0]] ], [ 9, [[BRANCH_2]] ], [ 2, [[BRANCH_4]] ], [ 7, [[BRANCH_6]] ] -; SWITCH-NEXT: ret i64 [[TMP2]] -; ; CHECK-LABEL: define i64 @bar2( ; CHECK-SAME: i64 [[A:%.*]]) { ; CHECK-NEXT: switch i64 [[A]], label [[UNREACHABLEDEFAULT:%.*]] [ diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll index 4600a66..b1e5fa4 100644 --- a/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vector-shifts.ll @@ -2032,23 +2032,23 @@ define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) { ret <4 x i64> %1 } -; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input. +; The shift amount is 0 (the poison lane could be 0), so we return the unshifted input. -define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) { -; CHECK-LABEL: @avx2_psrlv_q_128_undef( +define <2 x i64> @avx2_psrlv_q_128_poison(<2 x i64> %v) { +; CHECK-LABEL: @avx2_psrlv_q_128_poison( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; - %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 + %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1 %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } -define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psrlv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31> +define <4 x i64> @avx2_psrlv_q_256_poison(<4 x i64> %v) { +; CHECK-LABEL: @avx2_psrlv_q_256_poison( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; - %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 + %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0 %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } @@ -2435,21 +2435,21 @@ define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) { ; The shift amount is 0 (the undef lane could be 0), so we return the unshifted input. -define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) { -; CHECK-LABEL: @avx2_psllv_q_128_undef( +define <2 x i64> @avx2_psllv_q_128_poison(<2 x i64> %v) { +; CHECK-LABEL: @avx2_psllv_q_128_poison( ; CHECK-NEXT: ret <2 x i64> [[V:%.*]] ; - %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1 + %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 poison, i64 1 %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1) ret <2 x i64> %2 } -define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) { -; CHECK-LABEL: @avx2_psllv_q_256_undef( -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 undef, i64 8, i64 16, i64 31> +define <4 x i64> @avx2_psllv_q_256_poison(<4 x i64> %v) { +; CHECK-LABEL: @avx2_psllv_q_256_poison( +; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[V:%.*]], <i64 poison, i64 8, i64 16, i64 31> ; CHECK-NEXT: ret <4 x i64> [[TMP1]] ; - %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0 + %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 poison, i64 0 %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1) ret <4 x i64> %2 } diff --git a/llvm/test/Transforms/InstCombine/abs-1.ll b/llvm/test/Transforms/InstCombine/abs-1.ll index 7355c56..32bd7a3 100644 --- a/llvm/test/Transforms/InstCombine/abs-1.ll +++ b/llvm/test/Transforms/InstCombine/abs-1.ll @@ -63,14 +63,14 @@ define <2 x i8> @abs_canonical_2(<2 x i8> %x) { ret <2 x i8> %abs } -; Even if a constant has undef elements. +; Even if a constant has poison elements. -define <2 x i8> @abs_canonical_2_vec_undef_elts(<2 x i8> %x) { -; CHECK-LABEL: @abs_canonical_2_vec_undef_elts( +define <2 x i8> @abs_canonical_2_vec_poison_elts(<2 x i8> %x) { +; CHECK-LABEL: @abs_canonical_2_vec_poison_elts( ; CHECK-NEXT: [[ABS:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[X:%.*]], i1 false) ; CHECK-NEXT: ret <2 x i8> [[ABS]] ; - %cmp = icmp sgt <2 x i8> %x, <i8 undef, i8 -1> + %cmp = icmp sgt <2 x i8> %x, <i8 poison, i8 -1> %neg = sub <2 x i8> zeroinitializer, %x %abs = select <2 x i1> %cmp, <2 x i8> %x, <2 x i8> %neg ret <2 x i8> %abs @@ -208,15 +208,15 @@ define <2 x i8> @nabs_canonical_2(<2 x i8> %x) { ret <2 x i8> %abs } -; Even if a constant has undef elements. +; Even if a constant has poison elements. -define <2 x i8> @nabs_canonical_2_vec_undef_elts(<2 x i8> %x) { -; CHECK-LABEL: @nabs_canonical_2_vec_undef_elts( +define <2 x i8> @nabs_canonical_2_vec_poison_elts(<2 x i8> %x) { +; CHECK-LABEL: @nabs_canonical_2_vec_poison_elts( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.abs.v2i8(<2 x i8> [[X:%.*]], i1 false) ; CHECK-NEXT: [[ABS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[ABS]] ; - %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 undef> + %cmp = icmp sgt <2 x i8> %x, <i8 -1, i8 poison> %neg = sub <2 x i8> zeroinitializer, %x %abs = select <2 x i1> %cmp, <2 x i8> %neg, <2 x i8> %x ret <2 x i8> %abs diff --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll index 5fad615..0e579f3 100644 --- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll +++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll @@ -89,8 +89,8 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { ret <2 x i32> %dec } -define <2 x i32> @dec_mask_neg_v2i32_undef(<2 x i32> %X) { -; CHECK-LABEL: @dec_mask_neg_v2i32_undef( +define <2 x i32> @dec_mask_neg_v2i32_poison(<2 x i32> %X) { +; CHECK-LABEL: @dec_mask_neg_v2i32_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -1, i32 -1> ; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1> ; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] @@ -98,7 +98,7 @@ define <2 x i32> @dec_mask_neg_v2i32_undef(<2 x i32> %X) { ; %neg = sub <2 x i32> zeroinitializer, %X %mask = and <2 x i32> %neg, %X - %dec = add <2 x i32> %mask, <i32 -1, i32 undef> + %dec = add <2 x i32> %mask, <i32 -1, i32 poison> ret <2 x i32> %dec } diff --git a/llvm/test/Transforms/InstCombine/add-sitofp.ll b/llvm/test/Transforms/InstCombine/add-sitofp.ll index 2bdc808..f1afcaf 100644 --- a/llvm/test/Transforms/InstCombine/add-sitofp.ll +++ b/llvm/test/Transforms/InstCombine/add-sitofp.ll @@ -6,7 +6,7 @@ define double @x(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[P:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[P]] ; %m = lshr i32 %a, 24 @@ -20,7 +20,7 @@ define double @test(i32 %a) { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], 1 -; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + 1 doesn't overflow @@ -33,7 +33,7 @@ define double @test(i32 %a) { define float @test_neg(i32 %a) { ; CHECK-LABEL: @test_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg i32 [[A_AND]] to float ; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], 1.000000e+00 ; CHECK-NEXT: ret float [[RES]] ; @@ -49,7 +49,7 @@ define double @test_2(i32 %a, i32 %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = uitofp i32 [[TMP1]] to double +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg i32 [[TMP1]] to double ; CHECK-NEXT: ret double [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow @@ -67,8 +67,8 @@ define float @test_2_neg(i32 %a, i32 %b) { ; CHECK-LABEL: @test_2_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and i32 [[A:%.*]], 1073741823 ; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B:%.*]], 1073741823 -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp i32 [[A_AND]] to float -; CHECK-NEXT: [[B_AND_FP:%.*]] = sitofp i32 [[B_AND]] to float +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg i32 [[A_AND]] to float +; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg i32 [[B_AND]] to float ; CHECK-NEXT: [[RES:%.*]] = fadd float [[A_AND_FP]], [[B_AND_FP]] ; CHECK-NEXT: ret float [[RES]] ; @@ -89,7 +89,7 @@ define float @test_3(i32 %a, i32 %b) { ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[A:%.*]], 24 ; CHECK-NEXT: [[N:%.*]] = and i32 [[M]], [[B:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[N]], 1 -; CHECK-NEXT: [[P:%.*]] = uitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[P:%.*]] = uitofp nneg i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[P]] ; %m = lshr i32 %a, 24 @@ -104,7 +104,7 @@ define <4 x double> @test_4(<4 x i32> %a, <4 x i32> %b) { ; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> ; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <4 x i32> [[A_AND]], [[B_AND]] -; CHECK-NEXT: [[RES:%.*]] = uitofp <4 x i32> [[TMP1]] to <4 x double> +; CHECK-NEXT: [[RES:%.*]] = uitofp nneg <4 x i32> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[RES]] ; ; Drop two highest bits to guarantee that %a + %b doesn't overflow @@ -122,8 +122,8 @@ define <4 x float> @test_4_neg(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @test_4_neg( ; CHECK-NEXT: [[A_AND:%.*]] = and <4 x i32> [[A:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> ; CHECK-NEXT: [[B_AND:%.*]] = and <4 x i32> [[B:%.*]], <i32 1073741823, i32 1073741823, i32 1073741823, i32 1073741823> -; CHECK-NEXT: [[A_AND_FP:%.*]] = sitofp <4 x i32> [[A_AND]] to <4 x float> -; CHECK-NEXT: [[B_AND_FP:%.*]] = sitofp <4 x i32> [[B_AND]] to <4 x float> +; CHECK-NEXT: [[A_AND_FP:%.*]] = uitofp nneg <4 x i32> [[A_AND]] to <4 x float> +; CHECK-NEXT: [[B_AND_FP:%.*]] = uitofp nneg <4 x i32> [[B_AND]] to <4 x float> ; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A_AND_FP]], [[B_AND_FP]] ; CHECK-NEXT: ret <4 x float> [[RES]] ; diff --git a/llvm/test/Transforms/InstCombine/add.ll b/llvm/test/Transforms/InstCombine/add.ll index 408b0c6..39b4ad8 100644 --- a/llvm/test/Transforms/InstCombine/add.ll +++ b/llvm/test/Transforms/InstCombine/add.ll @@ -150,24 +150,24 @@ define i32 @test5_add_nsw(i32 %A, i32 %B) { ret i32 %D } -define <2 x i8> @neg_op0_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) { -; CHECK-LABEL: @neg_op0_vec_undef_elt( +define <2 x i8> @neg_op0_vec_poison_elt(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @neg_op0_vec_poison_elt( ; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> [[B:%.*]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %nega = sub <2 x i8> <i8 0, i8 undef>, %a + %nega = sub <2 x i8> <i8 0, i8 poison>, %a %r = add <2 x i8> %nega, %b ret <2 x i8> %r } -define <2 x i8> @neg_neg_vec_undef_elt(<2 x i8> %a, <2 x i8> %b) { -; CHECK-LABEL: @neg_neg_vec_undef_elt( +define <2 x i8> @neg_neg_vec_poison_elt(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: @neg_neg_vec_poison_elt( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %nega = sub <2 x i8> <i8 undef, i8 0>, %a - %negb = sub <2 x i8> <i8 undef, i8 0>, %b + %nega = sub <2 x i8> <i8 poison, i8 0>, %a + %negb = sub <2 x i8> <i8 poison, i8 0>, %b %r = add <2 x i8> %nega, %negb ret <2 x i8> %r } @@ -1196,14 +1196,14 @@ define <2 x i32> @test44_vec_non_matching(<2 x i32> %A) { ret <2 x i32> %C } -define <2 x i32> @test44_vec_undef(<2 x i32> %A) { -; CHECK-LABEL: @test44_vec_undef( -; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], <i32 123, i32 undef> -; CHECK-NEXT: [[C:%.*]] = add <2 x i32> [[B]], <i32 -123, i32 undef> +define <2 x i32> @test44_vec_poison(<2 x i32> %A) { +; CHECK-LABEL: @test44_vec_poison( +; CHECK-NEXT: [[B:%.*]] = or <2 x i32> [[A:%.*]], <i32 123, i32 poison> +; CHECK-NEXT: [[C:%.*]] = add nsw <2 x i32> [[B]], <i32 -123, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[C]] ; - %B = or <2 x i32> %A, <i32 123, i32 undef> - %C = add <2 x i32> %B, <i32 -123, i32 undef> + %B = or <2 x i32> %A, <i32 123, i32 poison> + %C = add <2 x i32> %B, <i32 -123, i32 poison> ret <2 x i32> %C } @@ -2983,7 +2983,7 @@ define i8 @signum_i8_i8_use3(i8 %x) { ret i8 %r } -; poison/undef is ok to propagate in shift amount +; poison is ok to propagate in shift amount ; complexity canonicalization guarantees that shift is op0 of add define <2 x i5> @signum_v2i5_v2i5(<2 x i5> %x) { diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index 63b11d0c..c20f48a 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -952,8 +952,8 @@ define i1 @substitute_constant_or_ne_uge_commute_logical(i8 %x, i8 %y) { ; Negative test - not safe to substitute vector constant with undef element -define <2 x i1> @substitute_constant_or_ne_slt_swap_vec(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec( +define <2 x i1> @substitute_constant_or_ne_slt_swap_vec_undef(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec_undef( ; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 42, i8 undef> ; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i8> [[Y:%.*]], [[X]] ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[C1]], [[C2]] @@ -965,14 +965,29 @@ define <2 x i1> @substitute_constant_or_ne_slt_swap_vec(<2 x i8> %x, <2 x i8> %y ret <2 x i1> %r } +; TODO: The poison case would be valid to fold. + +define <2 x i1> @substitute_constant_or_ne_slt_swap_vec_poison(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec_poison( +; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 42, i8 poison> +; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i8> [[Y:%.*]], [[X]] +; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[C1]], [[C2]] +; CHECK-NEXT: ret <2 x i1> [[R]] +; + %c1 = icmp ne <2 x i8> %x, <i8 42, i8 poison> + %c2 = icmp slt <2 x i8> %y, %x + %r = or <2 x i1> %c1, %c2 + ret <2 x i1> %r +} + define <2 x i1> @substitute_constant_or_ne_slt_swap_vec_logical(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @substitute_constant_or_ne_slt_swap_vec_logical( -; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 42, i8 undef> +; CHECK-NEXT: [[C1:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 42, i8 poison> ; CHECK-NEXT: [[C2:%.*]] = icmp slt <2 x i8> [[Y:%.*]], [[X]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C1]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[C2]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %c1 = icmp ne <2 x i8> %x, <i8 42, i8 undef> + %c1 = icmp ne <2 x i8> %x, <i8 42, i8 poison> %c2 = icmp slt <2 x i8> %y, %x %r = select <2 x i1> %c1, <2 x i1> <i1 true, i1 true>, <2 x i1> %c2 ret <2 x i1> %r @@ -2497,29 +2512,29 @@ define <2 x i1> @icmp_eq_m1_and_eq_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 -1, i8 -1> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %rx = icmp eq <2 x i8> %x, <i8 -1, i8 undef> - %ry = icmp eq <2 x i8> %y, <i8 -1, i8 undef> + %rx = icmp eq <2 x i8> %x, <i8 -1, i8 poison> + %ry = icmp eq <2 x i8> %y, <i8 -1, i8 poison> %r = and <2 x i1> %rx, %ry ret <2 x i1> %r } -define <2 x i1> @icmp_eq_m1_and_eq_undef_m1(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @icmp_eq_m1_and_eq_undef_m1( +define <2 x i1> @icmp_eq_m1_and_eq_poison_m1(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @icmp_eq_m1_and_eq_poison_m1( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], <i8 -1, i8 -1> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %rx = icmp eq <2 x i8> %x, <i8 -1, i8 undef> - %ry = icmp eq <2 x i8> %y, <i8 undef, i8 -1> + %rx = icmp eq <2 x i8> %x, <i8 -1, i8 poison> + %ry = icmp eq <2 x i8> %y, <i8 poison, i8 -1> %r = and <2 x i1> %rx, %ry ret <2 x i1> %r } -define <2 x i1> @icmp_eq_undef_and_eq_m1_m2(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @icmp_eq_undef_and_eq_m1_m2( -; CHECK-NEXT: ret <2 x i1> zeroinitializer +define <2 x i1> @icmp_eq_poison_and_eq_m1_m2(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @icmp_eq_poison_and_eq_m1_m2( +; CHECK-NEXT: ret <2 x i1> poison ; - %rx = icmp eq <2 x i8> %x, <i8 undef, i8 undef> + %rx = icmp eq <2 x i8> %x, <i8 poison, i8 poison> %ry = icmp eq <2 x i8> %y, <i8 -1, i8 -2> %r = and <2 x i1> %rx, %ry ret <2 x i1> %r @@ -2527,13 +2542,13 @@ define <2 x i1> @icmp_eq_undef_and_eq_m1_m2(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_ne_m1_and_ne_m1_fail(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_ne_m1_and_ne_m1_fail( -; CHECK-NEXT: [[RX:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 -1, i8 undef> -; CHECK-NEXT: [[RY:%.*]] = icmp ne <2 x i8> [[Y:%.*]], <i8 -1, i8 undef> +; CHECK-NEXT: [[RX:%.*]] = icmp ne <2 x i8> [[X:%.*]], <i8 -1, i8 poison> +; CHECK-NEXT: [[RY:%.*]] = icmp ne <2 x i8> [[Y:%.*]], <i8 -1, i8 poison> ; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[RX]], [[RY]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %rx = icmp ne <2 x i8> %x, <i8 -1, i8 undef> - %ry = icmp ne <2 x i8> %y, <i8 -1, i8 undef> + %rx = icmp ne <2 x i8> %x, <i8 -1, i8 poison> + %ry = icmp ne <2 x i8> %y, <i8 -1, i8 poison> %r = and <2 x i1> %rx, %ry ret <2 x i1> %r } @@ -2541,13 +2556,13 @@ define <2 x i1> @icmp_ne_m1_and_ne_m1_fail(<2 x i8> %x, <2 x i8> %y) { define <2 x i1> @icmp_eq_m1_or_eq_m1_fail(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @icmp_eq_m1_or_eq_m1_fail( -; CHECK-NEXT: [[RX:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -1, i8 undef> -; CHECK-NEXT: [[RY:%.*]] = icmp eq <2 x i8> [[Y:%.*]], <i8 -1, i8 undef> +; CHECK-NEXT: [[RX:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -1, i8 poison> +; CHECK-NEXT: [[RY:%.*]] = icmp eq <2 x i8> [[Y:%.*]], <i8 -1, i8 poison> ; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[RX]], [[RY]] ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %rx = icmp eq <2 x i8> %x, <i8 -1, i8 undef> - %ry = icmp eq <2 x i8> %y, <i8 -1, i8 undef> + %rx = icmp eq <2 x i8> %x, <i8 -1, i8 poison> + %ry = icmp eq <2 x i8> %y, <i8 -1, i8 poison> %r = or <2 x i1> %rx, %ry ret <2 x i1> %r } @@ -2560,7 +2575,7 @@ define <2 x i1> @icmp_ne_m1_or_ne_m1(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: ret <2 x i1> [[R]] ; %rx = icmp ne <2 x i8> %x, <i8 -1, i8 -1> - %ry = icmp ne <2 x i8> %y, <i8 -1, i8 undef> + %ry = icmp ne <2 x i8> %y, <i8 -1, i8 poison> %r = or <2 x i1> %rx, %ry ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/and-xor-or.ll b/llvm/test/Transforms/InstCombine/and-xor-or.ll index d072dc1..b26d6e1 100644 --- a/llvm/test/Transforms/InstCombine/and-xor-or.ll +++ b/llvm/test/Transforms/InstCombine/and-xor-or.ll @@ -843,7 +843,7 @@ define <2 x i6> @not_or_or_not_2i6(<2 x i6> %a0, <2 x i6> %b, <2 x i6> %c) { ; %a = sdiv <2 x i6> <i6 3, i6 3>, %a0 ; thwart complexity-based canonicalization %not1 = xor <2 x i6> %b, <i6 -1, i6 -1> - %not2 = xor <2 x i6> %c, <i6 -1, i6 undef> + %not2 = xor <2 x i6> %c, <i6 -1, i6 poison> %or1 = or <2 x i6> %a, %not1 %or2 = or <2 x i6> %or1, %not2 ret <2 x i6> %or2 @@ -4018,7 +4018,7 @@ define <2 x i4> @and_orn_xor_commute1(<2 x i4> %a, <2 x i4> %b) { ; CHECK-NEXT: ret <2 x i4> [[R]] ; %xor = xor <2 x i4> %a, %b - %nota = xor <2 x i4> %a, <i4 -1, i4 undef> + %nota = xor <2 x i4> %a, <i4 -1, i4 poison> %or = or <2 x i4> %nota, %b %r = and <2 x i4> %xor, %or ret <2 x i4> %r diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index ffd8c2a..b5250fc 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -752,16 +752,16 @@ define <2 x i64> @test36_uniform(<2 x i32> %X) { ret <2 x i64> %res } -define <2 x i64> @test36_undef(<2 x i32> %X) { -; CHECK-LABEL: @test36_undef( +define <2 x i64> @test36_poison(<2 x i32> %X) { +; CHECK-LABEL: @test36_poison( ; CHECK-NEXT: [[ZEXT:%.*]] = zext <2 x i32> [[X:%.*]] to <2 x i64> -; CHECK-NEXT: [[ZSUB:%.*]] = add <2 x i64> [[ZEXT]], <i64 7, i64 undef> -; CHECK-NEXT: [[RES:%.*]] = and <2 x i64> [[ZSUB]], <i64 240, i64 undef> +; CHECK-NEXT: [[ZSUB:%.*]] = add nuw nsw <2 x i64> [[ZEXT]], <i64 7, i64 poison> +; CHECK-NEXT: [[RES:%.*]] = and <2 x i64> [[ZSUB]], <i64 240, i64 poison> ; CHECK-NEXT: ret <2 x i64> [[RES]] ; %zext = zext <2 x i32> %X to <2 x i64> - %zsub = add <2 x i64> %zext, <i64 7, i64 undef> - %res = and <2 x i64> %zsub, <i64 240, i64 undef> + %zsub = add <2 x i64> %zext, <i64 7, i64 poison> + %res = and <2 x i64> %zsub, <i64 240, i64 poison> ret <2 x i64> %res } @@ -1630,16 +1630,16 @@ define <2 x i8> @lowmask_add_splat(<2 x i8> %x, ptr %p) { ret <2 x i8> %r } -define <2 x i8> @lowmask_add_splat_undef(<2 x i8> %x, ptr %p) { -; CHECK-LABEL: @lowmask_add_splat_undef( -; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 undef> +define <2 x i8> @lowmask_add_splat_poison(<2 x i8> %x, ptr %p) { +; CHECK-LABEL: @lowmask_add_splat_poison( +; CHECK-NEXT: [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 -64, i8 poison> ; CHECK-NEXT: store <2 x i8> [[A]], ptr [[P:%.*]], align 2 -; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[A]], <i8 undef, i8 32> +; CHECK-NEXT: [[R:%.*]] = and <2 x i8> [[X]], <i8 poison, i8 32> ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %a = add <2 x i8> %x, <i8 -64, i8 undef> ; 0xc0 + %a = add <2 x i8> %x, <i8 -64, i8 poison> ; 0xc0 store <2 x i8> %a, ptr %p - %r = and <2 x i8> %a, <i8 undef, i8 32> ; 0x20 + %r = and <2 x i8> %a, <i8 poison, i8 32> ; 0x20 ret <2 x i8> %r } @@ -1679,14 +1679,14 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) { ret <2 x i8> %C } -define <2 x i8> @flip_masked_bit_undef(<2 x i8> %A) { -; CHECK-LABEL: @flip_masked_bit_undef( +define <2 x i8> @flip_masked_bit_poison(<2 x i8> %A) { +; CHECK-LABEL: @flip_masked_bit_poison( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], <i8 -1, i8 -1> -; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 undef> +; CHECK-NEXT: [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 poison> ; CHECK-NEXT: ret <2 x i8> [[C]] ; - %B = add <2 x i8> %A, <i8 16, i8 undef> - %C = and <2 x i8> %B, <i8 16, i8 undef> + %B = add <2 x i8> %A, <i8 16, i8 poison> + %C = and <2 x i8> %B, <i8 16, i8 poison> ret <2 x i8> %C } @@ -2004,7 +2004,7 @@ define i16 @invert_signbit_splat_mask_use2(i8 %x, i16 %y) { ret i16 %r } -; extra use of sext is ok +; extra use of sext is ok define i16 @invert_signbit_splat_mask_use3(i8 %x, i16 %y) { ; CHECK-LABEL: @invert_signbit_splat_mask_use3( @@ -2120,41 +2120,40 @@ define <3 x i16> @shl_lshr_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % ret <3 x i16> %r } -define <3 x i16> @shl_lshr_pow2_const_case1_undef1_vec(<3 x i16> %x) { -; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef1_vec( +define <3 x i16> @shl_lshr_pow2_const_case1_poison1_vec(<3 x i16> %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 8, i16 4, i16 4> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; - %shl = shl <3 x i16> <i16 undef, i16 16, i16 16>, %x + %shl = shl <3 x i16> <i16 poison, i16 16, i16 16>, %x %lshr = lshr <3 x i16> %shl, <i16 5, i16 5, i16 5> %r = and <3 x i16> %lshr, <i16 8, i16 8, i16 8> ret <3 x i16> %r } -define <3 x i16> @shl_lshr_pow2_const_case1_undef2_vec(<3 x i16> %x) { -; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef2_vec( -; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 16, i16 16>, [[X:%.*]] -; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 undef, i16 5, i16 5> -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 8, i16 8, i16 8> +define <3 x i16> @shl_lshr_pow2_const_case1_poison2_vec(<3 x i16> %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison2_vec( +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 poison, i16 4, i16 4> +; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 8, i16 8, i16 8>, <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> <i16 16, i16 16, i16 16>, %x - %lshr = lshr <3 x i16> %shl, <i16 undef, i16 5, i16 5> + %lshr = lshr <3 x i16> %shl, <i16 poison, i16 5, i16 5> %r = and <3 x i16> %lshr, <i16 8, i16 8, i16 8> ret <3 x i16> %r } -define <3 x i16> @shl_lshr_pow2_const_case1_undef3_vec(<3 x i16> %x) { -; CHECK-LABEL: @shl_lshr_pow2_const_case1_undef3_vec( +define <3 x i16> @shl_lshr_pow2_const_case1_poison3_vec(<3 x i16> %x) { +; CHECK-LABEL: @shl_lshr_pow2_const_case1_poison3_vec( ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> <i16 16, i16 16, i16 16>, [[X:%.*]] ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> [[SHL]], <i16 5, i16 5, i16 5> -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 undef, i16 8, i16 8> +; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[LSHR]], <i16 poison, i16 8, i16 8> ; CHECK-NEXT: ret <3 x i16> [[R]] ; %shl = shl <3 x i16> <i16 16, i16 16, i16 16>, %x %lshr = lshr <3 x i16> %shl, <i16 5, i16 5, i16 5> - %r = and <3 x i16> %lshr, <i16 undef, i16 8, i16 8> + %r = and <3 x i16> %lshr, <i16 poison, i16 8, i16 8> ret <3 x i16> %r } @@ -2417,40 +2416,41 @@ define <3 x i16> @lshr_shl_pow2_const_case1_non_uniform_vec_negative(<3 x i16> % ret <3 x i16> %r } -define <3 x i16> @lshr_shl_pow2_const_case1_undef1_vec(<3 x i16> %x) { -; CHECK-LABEL: @lshr_shl_pow2_const_case1_undef1_vec( +define <3 x i16> @lshr_shl_pow2_const_case1_poison1_vec(<3 x i16> %x) { +; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison1_vec( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 -1, i16 12, i16 12> ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 128, i16 128, i16 128>, <3 x i16> zeroinitializer ; CHECK-NEXT: ret <3 x i16> [[R]] ; - %lshr = lshr <3 x i16> <i16 undef, i16 8192, i16 8192>, %x + %lshr = lshr <3 x i16> <i16 poison, i16 8192, i16 8192>, %x %shl = shl <3 x i16> %lshr, <i16 6, i16 6, i16 6> %r = and <3 x i16> %shl, <i16 128, i16 128, i16 128> ret <3 x i16> %r } -define <3 x i16> @lshr_shl_pow2_const_case1_undef2_vec(<3 x i16> %x) { -; CHECK-LABEL: @lshr_shl_pow2_const_case1_undef2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <3 x i16> [[X:%.*]], <i16 undef, i16 12, i16 12> -; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP1]], <3 x i16> <i16 128, i16 128, i16 128>, <3 x i16> zeroinitializer +define <3 x i16> @lshr_shl_pow2_const_case1_poison2_vec(<3 x i16> %x) { +; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison2_vec( +; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, [[X:%.*]] +; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], <i16 poison, i16 6, i16 6> +; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], <i16 128, i16 128, i16 128> ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, %x - %shl = shl <3 x i16> %lshr, <i16 undef, i16 6, i16 6> + %shl = shl <3 x i16> %lshr, <i16 poison, i16 6, i16 6> %r = and <3 x i16> %shl, <i16 128, i16 128, i16 128> ret <3 x i16> %r } -define <3 x i16> @lshr_shl_pow2_const_case1_undef3_vec(<3 x i16> %x) { -; CHECK-LABEL: @lshr_shl_pow2_const_case1_undef3_vec( +define <3 x i16> @lshr_shl_pow2_const_case1_poison3_vec(<3 x i16> %x) { +; CHECK-LABEL: @lshr_shl_pow2_const_case1_poison3_vec( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, [[X:%.*]] ; CHECK-NEXT: [[SHL:%.*]] = shl <3 x i16> [[LSHR]], <i16 6, i16 6, i16 6> -; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], <i16 undef, i16 128, i16 128> +; CHECK-NEXT: [[R:%.*]] = and <3 x i16> [[SHL]], <i16 poison, i16 128, i16 128> ; CHECK-NEXT: ret <3 x i16> [[R]] ; %lshr = lshr <3 x i16> <i16 8192, i16 8192, i16 8192>, %x %shl = shl <3 x i16> %lshr, <i16 6, i16 6, i16 6> - %r = and <3 x i16> %shl, <i16 undef, i16 128, i16 128> + %r = and <3 x i16> %shl, <i16 poison, i16 128, i16 128> ret <3 x i16> %r } diff --git a/llvm/test/Transforms/InstCombine/and2.ll b/llvm/test/Transforms/InstCombine/and2.ll index 73bdadc..104486e 100644 --- a/llvm/test/Transforms/InstCombine/and2.ll +++ b/llvm/test/Transforms/InstCombine/and2.ll @@ -168,14 +168,14 @@ define <2 x i8> @and1_shl1_is_cmp_eq_0_vec(<2 x i8> %x) { ret <2 x i8> %and } -define <2 x i8> @and1_shl1_is_cmp_eq_0_vec_undef(<2 x i8> %x) { -; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_vec_undef( +define <2 x i8> @and1_shl1_is_cmp_eq_0_vec_poison(<2 x i8> %x) { +; CHECK-LABEL: @and1_shl1_is_cmp_eq_0_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[AND]] ; - %sh = shl <2 x i8> <i8 1, i8 undef>, %x - %and = and <2 x i8> %sh, <i8 1, i8 undef> + %sh = shl <2 x i8> <i8 1, i8 poison>, %x + %and = and <2 x i8> %sh, <i8 1, i8 poison> ret <2 x i8> %and } @@ -215,14 +215,13 @@ define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec(<2 x i8> %x) { ret <2 x i8> %and } -define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec_undef(<2 x i8> %x) { -; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer -; CHECK-NEXT: [[AND:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8> +define <2 x i8> @and1_lshr1_is_cmp_eq_0_vec_poison(<2 x i8> %x) { +; CHECK-LABEL: @and1_lshr1_is_cmp_eq_0_vec_poison( +; CHECK-NEXT: [[AND:%.*]] = lshr <2 x i8> <i8 1, i8 poison>, [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[AND]] ; - %sh = lshr <2 x i8> <i8 1, i8 undef>, %x - %and = and <2 x i8> %sh, <i8 1, i8 undef> + %sh = lshr <2 x i8> <i8 1, i8 poison>, %x + %and = and <2 x i8> %sh, <i8 1, i8 poison> ret <2 x i8> %and } diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll index 60fa5b25..ac206dc 100644 --- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll +++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll @@ -229,24 +229,24 @@ define <2 x i32> @ashr_lshr_inv_nonsplat_vec(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %ret } -define <2 x i32> @ashr_lshr_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @ashr_lshr_vec_undef( +define <2 x i32> @ashr_lshr_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @ashr_lshr_vec_poison( ; CHECK-NEXT: [[CMP12:%.*]] = ashr <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[CMP12]] ; - %cmp = icmp sgt <2 x i32> %x, <i32 undef, i32 -1> + %cmp = icmp sgt <2 x i32> %x, <i32 poison, i32 -1> %l = lshr <2 x i32> %x, %y %r = ashr exact <2 x i32> %x, %y %ret = select <2 x i1> %cmp, <2 x i32> %l, <2 x i32> %r ret <2 x i32> %ret } -define <2 x i32> @ashr_lshr_vec_undef2(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @ashr_lshr_vec_undef2( +define <2 x i32> @ashr_lshr_vec_poison2(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @ashr_lshr_vec_poison2( ; CHECK-NEXT: [[CMP1:%.*]] = ashr exact <2 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[CMP1]] ; - %cmp = icmp slt <2 x i32> %x, <i32 1, i32 undef> + %cmp = icmp slt <2 x i32> %x, <i32 1, i32 poison> %l = lshr exact <2 x i32> %x, %y %r = ashr exact <2 x i32> %x, %y %ret = select <2 x i1> %cmp, <2 x i32> %r, <2 x i32> %l @@ -498,14 +498,14 @@ define <3 x i42> @lshr_sub_nsw_splat(<3 x i42> %x, <3 x i42> %y) { ret <3 x i42> %shr } -define <3 x i42> @lshr_sub_nsw_splat_undef(<3 x i42> %x, <3 x i42> %y) { -; CHECK-LABEL: @lshr_sub_nsw_splat_undef( +define <3 x i42> @lshr_sub_nsw_splat_poison(<3 x i42> %x, <3 x i42> %y) { +; CHECK-LABEL: @lshr_sub_nsw_splat_poison( ; CHECK-NEXT: [[SUB:%.*]] = sub nsw <3 x i42> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[SUB]], <i42 41, i42 undef, i42 41> +; CHECK-NEXT: [[SHR:%.*]] = lshr <3 x i42> [[SUB]], <i42 41, i42 poison, i42 41> ; CHECK-NEXT: ret <3 x i42> [[SHR]] ; %sub = sub nsw <3 x i42> %x, %y - %shr = lshr <3 x i42> %sub, <i42 41, i42 undef, i42 41> + %shr = lshr <3 x i42> %sub, <i42 41, i42 poison, i42 41> ret <3 x i42> %shr } @@ -572,14 +572,14 @@ define <3 x i43> @ashr_sub_nsw_splat(<3 x i43> %x, <3 x i43> %y) { ret <3 x i43> %shr } -define <3 x i43> @ashr_sub_nsw_splat_undef(<3 x i43> %x, <3 x i43> %y) { -; CHECK-LABEL: @ashr_sub_nsw_splat_undef( +define <3 x i43> @ashr_sub_nsw_splat_poison(<3 x i43> %x, <3 x i43> %y) { +; CHECK-LABEL: @ashr_sub_nsw_splat_poison( ; CHECK-NEXT: [[SUB:%.*]] = sub nsw <3 x i43> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SHR:%.*]] = ashr <3 x i43> [[SUB]], <i43 42, i43 undef, i43 42> +; CHECK-NEXT: [[SHR:%.*]] = ashr <3 x i43> [[SUB]], <i43 42, i43 poison, i43 42> ; CHECK-NEXT: ret <3 x i43> [[SHR]] ; %sub = sub nsw <3 x i43> %x, %y - %shr = ashr <3 x i43> %sub, <i43 42, i43 undef, i43 42> + %shr = ashr <3 x i43> %sub, <i43 42, i43 poison, i43 42> ret <3 x i43> %shr } diff --git a/llvm/test/Transforms/InstCombine/ashr-or-mul-abs.ll b/llvm/test/Transforms/InstCombine/ashr-or-mul-abs.ll index 3cf312e..46a7f2f 100644 --- a/llvm/test/Transforms/InstCombine/ashr-or-mul-abs.ll +++ b/llvm/test/Transforms/InstCombine/ashr-or-mul-abs.ll @@ -62,13 +62,13 @@ define <4 x i32> @ashr_or_mul_to_abs_vec2(<4 x i32> %X) { ret <4 x i32> %i2 } -define <4 x i32> @ashr_or_mul_to_abs_vec3_undef(<4 x i32> %X) { -; CHECK-LABEL: @ashr_or_mul_to_abs_vec3_undef( +define <4 x i32> @ashr_or_mul_to_abs_vec3_poison(<4 x i32> %X) { +; CHECK-LABEL: @ashr_or_mul_to_abs_vec3_poison( ; CHECK-NEXT: [[I2:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[X:%.*]], i1 false) ; CHECK-NEXT: ret <4 x i32> [[I2]] ; - %i = ashr <4 x i32> %X, <i32 31, i32 undef, i32 31, i32 31> - %i1 = or <4 x i32> %i, <i32 1, i32 1, i32 1, i32 undef> + %i = ashr <4 x i32> %X, <i32 31, i32 poison, i32 31, i32 31> + %i1 = or <4 x i32> %i, <i32 1, i32 1, i32 1, i32 poison> %i2 = mul <4 x i32> %i1, %X ret <4 x i32> %i2 } diff --git a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll index 1489638..f776dc1 100644 --- a/llvm/test/Transforms/InstCombine/binop-and-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-and-shifts.ll @@ -178,27 +178,27 @@ define <2 x i8> @shl_xor_and(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_and( ; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i8> [[Y:%.*]], <i8 11, i8 poison> ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] -; CHECK-NEXT: [[BW1:%.*]] = shl <2 x i8> [[TMP2]], <i8 2, i8 undef> +; CHECK-NEXT: [[BW1:%.*]] = shl <2 x i8> [[TMP2]], <i8 2, i8 poison> ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; - %shift1 = shl <2 x i8> %x, <i8 2, i8 undef> - %shift2 = shl <2 x i8> %y, <i8 2, i8 undef> - %bw2 = xor <2 x i8> %shift2, <i8 44, i8 undef> + %shift1 = shl <2 x i8> %x, <i8 2, i8 poison> + %shift2 = shl <2 x i8> %y, <i8 2, i8 poison> + %bw2 = xor <2 x i8> %shift2, <i8 44, i8 poison> %bw1 = and <2 x i8> %bw2, %shift1 ret <2 x i8> %bw1 } define <2 x i8> @shl_xor_and_fail(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @shl_xor_and_fail( -; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 2, i8 undef> -; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 undef, i8 2> -; CHECK-NEXT: [[BW2:%.*]] = xor <2 x i8> [[SHIFT2]], <i8 44, i8 undef> +; CHECK-NEXT: [[SHIFT1:%.*]] = shl <2 x i8> [[X:%.*]], <i8 2, i8 poison> +; CHECK-NEXT: [[SHIFT2:%.*]] = shl <2 x i8> [[Y:%.*]], <i8 poison, i8 2> +; CHECK-NEXT: [[BW2:%.*]] = xor <2 x i8> [[SHIFT2]], <i8 44, i8 poison> ; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[SHIFT1]], [[BW2]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; - %shift1 = shl <2 x i8> %x, <i8 2, i8 undef> - %shift2 = shl <2 x i8> %y, <i8 undef, i8 2> - %bw2 = xor <2 x i8> %shift2, <i8 44, i8 undef> + %shift1 = shl <2 x i8> %x, <i8 2, i8 poison> + %shift2 = shl <2 x i8> %y, <i8 poison, i8 2> + %bw2 = xor <2 x i8> %shift2, <i8 44, i8 poison> %bw1 = and <2 x i8> %shift1, %bw2 ret <2 x i8> %bw1 } @@ -321,13 +321,13 @@ define <2 x i8> @lshr_add_and(<2 x i8> %x, <2 x i8> %y) { define <2 x i8> @lshr_add_or_fail_dif_masks(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @lshr_add_or_fail_dif_masks( ; CHECK-NEXT: [[SHIFT1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 4> -; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], <i8 undef, i8 3> -; CHECK-NEXT: [[BW2:%.*]] = add <2 x i8> [[SHIFT2]], <i8 -1, i8 1> +; CHECK-NEXT: [[SHIFT2:%.*]] = lshr <2 x i8> [[Y:%.*]], <i8 poison, i8 3> +; CHECK-NEXT: [[BW2:%.*]] = add nsw <2 x i8> [[SHIFT2]], <i8 -1, i8 1> ; CHECK-NEXT: [[BW1:%.*]] = and <2 x i8> [[SHIFT1]], [[BW2]] ; CHECK-NEXT: ret <2 x i8> [[BW1]] ; %shift1 = lshr <2 x i8> %x, <i8 3, i8 4> - %shift2 = lshr <2 x i8> %y, <i8 undef, i8 3> + %shift2 = lshr <2 x i8> %y, <i8 poison, i8 3> %bw2 = add <2 x i8> %shift2, <i8 255, i8 1> %bw1 = and <2 x i8> %shift1, %bw2 ret <2 x i8> %bw1 @@ -659,8 +659,8 @@ define <4 x i8> @and_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ret <4 x i8> %and } -define <4 x i8> @and_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @and_ashr_not_vec_undef_1( +define <4 x i8> @and_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @and_ashr_not_vec_poison_1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> ; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] @@ -668,18 +668,18 @@ define <4 x i8> @and_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 poison, i8 poison, i8 poison> %and = and <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %and } -define <4 x i8> @and_ashr_not_vec_undef_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @and_ashr_not_vec_undef_2( -; CHECK-NEXT: ret <4 x i8> zeroinitializer +define <4 x i8> @and_ashr_not_vec_poison_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @and_ashr_not_vec_poison_2( +; CHECK-NEXT: ret <4 x i8> poison ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 undef, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 poison, i8 poison, i8 poison, i8 poison> %and = and <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %and } @@ -793,8 +793,8 @@ define <4 x i8> @or_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh ret <4 x i8> %or } -define <4 x i8> @or_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @or_ashr_not_vec_undef_1( +define <4 x i8> @or_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @or_ashr_not_vec_poison_1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> ; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: [[OR:%.*]] = ashr <4 x i8> [[TMP2]], [[SHAMT:%.*]] @@ -802,18 +802,18 @@ define <4 x i8> @or_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sha ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 poison, i8 poison, i8 poison> %or = or <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %or } -define <4 x i8> @or_ashr_not_vec_undef_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @or_ashr_not_vec_undef_2( -; CHECK-NEXT: ret <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1> +define <4 x i8> @or_ashr_not_vec_poison_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @or_ashr_not_vec_poison_2( +; CHECK-NEXT: ret <4 x i8> poison ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 undef, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 poison, i8 poison, i8 poison, i8 poison> %or = or <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %or } @@ -926,8 +926,8 @@ define <4 x i8> @xor_ashr_not_vec_commuted(<4 x i8> %x, <4 x i8> %y, <4 x i8> %s ret <4 x i8> %xor } -define <4 x i8> @xor_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @xor_ashr_not_vec_undef_1( +define <4 x i8> @xor_ashr_not_vec_poison_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @xor_ashr_not_vec_poison_1( ; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i8> [[TMP1]], [[SHAMT:%.*]] ; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[DOTNOT]], <i8 -1, i8 -1, i8 -1, i8 -1> @@ -935,18 +935,18 @@ define <4 x i8> @xor_ashr_not_vec_undef_1(<4 x i8> %x, <4 x i8> %y, <4 x i8> %sh ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 -1, i8 poison, i8 poison, i8 poison> %xor = xor <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %xor } -define <4 x i8> @xor_ashr_not_vec_undef_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { -; CHECK-LABEL: @xor_ashr_not_vec_undef_2( -; CHECK-NEXT: ret <4 x i8> undef +define <4 x i8> @xor_ashr_not_vec_poison_2(<4 x i8> %x, <4 x i8> %y, <4 x i8> %shamt) { +; CHECK-LABEL: @xor_ashr_not_vec_poison_2( +; CHECK-NEXT: ret <4 x i8> poison ; %x.shift = ashr <4 x i8> %x, %shamt %y.shift = ashr <4 x i8> %y, %shamt - %y.shift.not = xor <4 x i8> %y.shift, <i8 undef, i8 undef, i8 undef, i8 undef> + %y.shift.not = xor <4 x i8> %y.shift, <i8 poison, i8 poison, i8 poison, i8 poison> %xor = xor <4 x i8> %x.shift, %y.shift.not ret <4 x i8> %xor } diff --git a/llvm/test/Transforms/InstCombine/binop-itofp.ll b/llvm/test/Transforms/InstCombine/binop-itofp.ll index d72a54e..097a819 100644 --- a/llvm/test/Transforms/InstCombine/binop-itofp.ll +++ b/llvm/test/Transforms/InstCombine/binop-itofp.ll @@ -21,7 +21,7 @@ define half @test_ui_ui_i8_add_fail_overflow(i8 noundef %x_in, i8 noundef %y_in) ; CHECK-LABEL: @test_ui_ui_i8_add_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], -127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -49,7 +49,7 @@ define half @test_ui_ui_i8_add_C(i8 noundef %x_in) { define half @test_ui_ui_i8_add_C_fail_no_repr(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_add_C_fail_no_repr( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH57F8 ; CHECK-NEXT: ret half [[R]] ; @@ -62,7 +62,7 @@ define half @test_ui_ui_i8_add_C_fail_no_repr(i8 noundef %x_in) { define half @test_ui_ui_i8_add_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_add_C_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH5808 ; CHECK-NEXT: ret half [[R]] ; @@ -110,7 +110,7 @@ define half @test_ui_si_i8_add(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -140,7 +140,7 @@ define half @test_ui_si_i8_add_overflow(i8 noundef %x_in, i8 noundef %y_in) { define half @test_ui_ui_i8_sub_C(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_sub_C( ; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X_IN:%.*]], 127 -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = or i8 %x_in, 128 @@ -166,7 +166,7 @@ define half @test_si_si_i8_sub(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -64 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -181,7 +181,7 @@ define half @test_si_si_i8_sub_fail_overflow(i8 noundef %x_in, i8 noundef %y_in) ; CHECK-LABEL: @test_si_si_i8_sub_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -65 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -198,7 +198,7 @@ define half @test_si_si_i8_sub_C(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_sub_C( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[X]], 64 -; CHECK-NEXT: [[R:%.*]] = sitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i8 %x_in, 63 @@ -283,7 +283,7 @@ define half @test_ui_ui_i8_mul_C(i8 noundef %x_in) { define half @test_ui_ui_i8_mul_C_fail_overlow(i8 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i8_mul_C_fail_overlow( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 14 -; CHECK-NEXT: [[XF:%.*]] = uitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xH4CC0 ; CHECK-NEXT: ret half [[R]] ; @@ -315,7 +315,7 @@ define half @test_si_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i ; CHECK-LABEL: @test_si_si_i8_mul_fail_maybe_zero( ; CHECK-NEXT: [[X:%.*]] = and i8 [[X_IN:%.*]], 7 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -8 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -332,7 +332,7 @@ define half @test_si_si_i8_mul_C_fail_no_repr(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_mul_C_fail_no_repr( ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[X:%.*]] = or disjoint i8 [[XX]], 1 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xHC780 ; CHECK-NEXT: ret half [[R]] ; @@ -347,7 +347,7 @@ define half @test_si_si_i8_mul_C_fail_overflow(i8 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i8_mul_C_fail_overflow( ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[X:%.*]] = or disjoint i8 [[XX]], 1 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], 0xHCCC0 ; CHECK-NEXT: ret half [[R]] ; @@ -365,7 +365,7 @@ define half @test_ui_si_i8_mul(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i8 [[Y_IN:%.*]], 7 ; CHECK-NEXT: [[Y:%.*]] = add nuw nsw i8 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 6 @@ -384,7 +384,7 @@ define half @test_ui_si_i8_mul_fail_maybe_zero(i8 noundef %x_in, i8 noundef %y_i ; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = and i8 [[Y_IN:%.*]], 7 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i8 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i8 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i8 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i8 %x_in, 7 @@ -401,7 +401,7 @@ define half @test_ui_si_i8_mul_fail_signed(i8 noundef %x_in, i8 noundef %y_in) { ; CHECK-NEXT: [[XX:%.*]] = and i8 [[X_IN:%.*]], 7 ; CHECK-NEXT: [[X:%.*]] = add nuw nsw i8 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i8 [[Y_IN:%.*]], -4 -; CHECK-NEXT: [[XF:%.*]] = sitofp i8 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i8 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i8 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -420,7 +420,7 @@ define half @test_ui_ui_i16_add(i16 noundef %x_in, i16 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i16 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i16 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i16 %x_in, 2047 @@ -435,8 +435,8 @@ define half @test_ui_ui_i16_add_fail_not_promotable(i16 noundef %x_in, i16 nound ; CHECK-LABEL: @test_ui_ui_i16_add_fail_not_promotable( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2049 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i16 [[Y]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -463,7 +463,7 @@ define half @test_ui_ui_i16_add_C(i16 noundef %x_in) { define half @test_ui_ui_i16_add_C_fail_overflow(i16 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i16_add_C_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], 0xH7BD0 ; CHECK-NEXT: ret half [[R]] ; @@ -541,7 +541,7 @@ define half @test_si_si_i16_sub_fail_no_promotion(i16 noundef %x_in, i16 noundef ; CHECK-LABEL: @test_si_si_i16_sub_fail_no_promotion( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = or i16 [[Y_IN:%.*]], -2049 -; CHECK-NEXT: [[XF:%.*]] = sitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -575,7 +575,7 @@ define half @test_ui_si_i16_sub_fail_maybe_signed(i16 noundef %x_in, i16 noundef ; CHECK-NEXT: [[X:%.*]] = or i16 [[X_IN:%.*]], -2048 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 2047 ; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fsub half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -607,8 +607,8 @@ define half @test_ui_ui_i16_mul_fail_no_promotion(i16 noundef %x_in, i16 noundef ; CHECK-LABEL: @test_ui_ui_i16_mul_fail_no_promotion( ; CHECK-NEXT: [[X:%.*]] = and i16 [[X_IN:%.*]], 4095 ; CHECK-NEXT: [[Y:%.*]] = and i16 [[Y_IN:%.*]], 3 -; CHECK-NEXT: [[XF:%.*]] = uitofp i16 [[X]] to half -; CHECK-NEXT: [[YF:%.*]] = uitofp i16 [[Y]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half +; CHECK-NEXT: [[YF:%.*]] = uitofp nneg i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] ; @@ -643,7 +643,7 @@ define half @test_si_si_i16_mul_fail_overflow(i16 noundef %x_in, i16 noundef %y_ ; CHECK-NEXT: [[XX:%.*]] = and i16 [[X_IN:%.*]], 126 ; CHECK-NEXT: [[X:%.*]] = or disjoint i16 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i16 [[Y_IN:%.*]], -257 -; CHECK-NEXT: [[XF:%.*]] = sitofp i16 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i16 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i16 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -690,7 +690,7 @@ define half @test_ui_si_i16_mul(i16 noundef %x_in, i16 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i16 [[Y_IN:%.*]], 126 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i16 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i16 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i16 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i16 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i16 %x_in, 126 @@ -723,7 +723,7 @@ define half @test_ui_ui_i12_add_fail_overflow(i12 noundef %x_in, i12 noundef %y_ ; CHECK-LABEL: @test_ui_ui_i12_add_fail_overflow( ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 2047 ; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], -2047 -; CHECK-NEXT: [[XF:%.*]] = uitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = uitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fadd half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -821,7 +821,7 @@ define half @test_si_si_i12_sub(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 1023 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -1024 ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 1023 @@ -850,7 +850,7 @@ define half @test_ui_ui_i12_mul(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 31 ; CHECK-NEXT: [[Y:%.*]] = and i12 [[Y_IN:%.*]], 63 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 31 @@ -883,7 +883,7 @@ define half @test_ui_ui_i12_mul_C(i12 noundef %x_in) { ; CHECK-LABEL: @test_ui_ui_i12_mul_C( ; CHECK-NEXT: [[X:%.*]] = shl i12 [[X_IN:%.*]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = and i12 [[X]], 1984 -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = and i12 %x_in, 31 @@ -915,7 +915,7 @@ define half @test_si_si_i12_mul_fail_overflow(i12 noundef %x_in, i12 noundef %y_ ; CHECK-NEXT: [[XX:%.*]] = and i12 [[X_IN:%.*]], 30 ; CHECK-NEXT: [[X:%.*]] = or disjoint i12 [[XX]], 1 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -128 -; CHECK-NEXT: [[XF:%.*]] = sitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -933,7 +933,7 @@ define half @test_si_si_i12_mul_fail_maybe_non_zero(i12 noundef %x_in, i12 nound ; CHECK-LABEL: @test_si_si_i12_mul_fail_maybe_non_zero( ; CHECK-NEXT: [[X:%.*]] = and i12 [[X_IN:%.*]], 30 ; CHECK-NEXT: [[Y:%.*]] = or i12 [[Y_IN:%.*]], -128 -; CHECK-NEXT: [[XF:%.*]] = sitofp i12 [[X]] to half +; CHECK-NEXT: [[XF:%.*]] = uitofp nneg i12 [[X]] to half ; CHECK-NEXT: [[YF:%.*]] = sitofp i12 [[Y]] to half ; CHECK-NEXT: [[R:%.*]] = fmul half [[XF]], [[YF]] ; CHECK-NEXT: ret half [[R]] @@ -950,7 +950,7 @@ define half @test_si_si_i12_mul_C(i12 noundef %x_in) { ; CHECK-LABEL: @test_si_si_i12_mul_C( ; CHECK-NEXT: [[X:%.*]] = or i12 [[X_IN:%.*]], -64 ; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i12 [[X]], -16 -; CHECK-NEXT: [[R:%.*]] = sitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %x = or i12 %x_in, -64 @@ -979,7 +979,7 @@ define half @test_ui_si_i12_mul_nsw(i12 noundef %x_in, i12 noundef %y_in) { ; CHECK-NEXT: [[YY:%.*]] = and i12 [[Y_IN:%.*]], 30 ; CHECK-NEXT: [[Y:%.*]] = or disjoint i12 [[YY]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i12 [[X]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = uitofp i12 [[TMP1]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i12 [[TMP1]] to half ; CHECK-NEXT: ret half [[R]] ; %xx = and i12 %x_in, 31 diff --git a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll index 27a3c87..a16ad4d 100644 --- a/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll +++ b/llvm/test/Transforms/InstCombine/binop-of-displaced-shifts.ll @@ -202,41 +202,41 @@ define <2 x i8> @shl_or_non_splat(<2 x i8> %x) { ret <2 x i8> %binop } -define <2 x i8> @shl_or_undef_in_add(<2 x i8> %x) { -; CHECK-LABEL: define <2 x i8> @shl_or_undef_in_add +define <2 x i8> @shl_or_poison_in_add(<2 x i8> %x) { +; CHECK-LABEL: define <2 x i8> @shl_or_poison_in_add ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { ; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 poison>, [[X]] ; CHECK-NEXT: ret <2 x i8> [[BINOP]] ; %shift = shl <2 x i8> <i8 16, i8 16>, %x - %add = add <2 x i8> %x, <i8 1, i8 undef> + %add = add <2 x i8> %x, <i8 1, i8 poison> %shift2 = shl <2 x i8> <i8 3, i8 3>, %add %binop = or <2 x i8> %shift, %shift2 ret <2 x i8> %binop } -define <2 x i8> @shl_or_undef_in_shift1(<2 x i8> %x) { -; CHECK-LABEL: define <2 x i8> @shl_or_undef_in_shift1 +define <2 x i8> @shl_or_poison_in_shift1(<2 x i8> %x) { +; CHECK-LABEL: define <2 x i8> @shl_or_poison_in_shift1 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 -1>, [[X]] +; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 poison>, [[X]] ; CHECK-NEXT: ret <2 x i8> [[BINOP]] ; - %shift = shl <2 x i8> <i8 16, i8 undef>, %x + %shift = shl <2 x i8> <i8 16, i8 poison>, %x %add = add <2 x i8> %x, <i8 1, i8 1> %shift2 = shl <2 x i8> <i8 3, i8 3>, %add %binop = or <2 x i8> %shift, %shift2 ret <2 x i8> %binop } -define <2 x i8> @shl_or_undef_in_shift2(<2 x i8> %x) { -; CHECK-LABEL: define <2 x i8> @shl_or_undef_in_shift2 +define <2 x i8> @shl_or_poison_in_shift2(<2 x i8> %x) { +; CHECK-LABEL: define <2 x i8> @shl_or_poison_in_shift2 ; CHECK-SAME: (<2 x i8> [[X:%.*]]) { -; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 16>, [[X]] +; CHECK-NEXT: [[BINOP:%.*]] = shl <2 x i8> <i8 22, i8 poison>, [[X]] ; CHECK-NEXT: ret <2 x i8> [[BINOP]] ; %shift = shl <2 x i8> <i8 16, i8 16>, %x %add = add <2 x i8> %x, <i8 1, i8 1> - %shift2 = shl <2 x i8> <i8 3, i8 undef>, %add + %shift2 = shl <2 x i8> <i8 3, i8 poison>, %add %binop = or <2 x i8> %shift, %shift2 ret <2 x i8> %binop } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll index 4547008..c555970 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-between-zero-and-positive-threshold.ll @@ -338,22 +338,22 @@ define <2 x i32> @t18_ult_slt_vec_nonsplat(<2 x i32> %x, <2 x i32> %replacement_ ret <2 x i32> %r } -define <3 x i32> @t19_ult_slt_vec_undef0(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { -; CHECK-LABEL: @t19_ult_slt_vec_undef0( +define <3 x i32> @t19_ult_slt_vec_poison0(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { +; CHECK-LABEL: @t19_ult_slt_vec_poison0( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535> ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; - %t0 = icmp slt <3 x i32> %x, <i32 65536, i32 undef, i32 65536> + %t0 = icmp slt <3 x i32> %x, <i32 65536, i32 poison, i32 65536> %t1 = select <3 x i1> %t0, <3 x i32> %replacement_low, <3 x i32> %replacement_high %t2 = icmp ult <3 x i32> %x, <i32 65536, i32 65536, i32 65536> %r = select <3 x i1> %t2, <3 x i32> %x, <3 x i32> %t1 ret <3 x i32> %r } -define <3 x i32> @t20_ult_slt_vec_undef1(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { -; CHECK-LABEL: @t20_ult_slt_vec_undef1( +define <3 x i32> @t20_ult_slt_vec_poison1(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { +; CHECK-LABEL: @t20_ult_slt_vec_poison1( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535> ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] @@ -362,21 +362,21 @@ define <3 x i32> @t20_ult_slt_vec_undef1(<3 x i32> %x, <3 x i32> %replacement_lo ; %t0 = icmp slt <3 x i32> %x, <i32 65536, i32 65537, i32 65536> %t1 = select <3 x i1> %t0, <3 x i32> %replacement_low, <3 x i32> %replacement_high - %t2 = icmp ult <3 x i32> %x, <i32 65536, i32 undef, i32 65536> + %t2 = icmp ult <3 x i32> %x, <i32 65536, i32 poison, i32 65536> %r = select <3 x i1> %t2, <3 x i32> %x, <3 x i32> %t1 ret <3 x i32> %r } -define <3 x i32> @t21_ult_slt_vec_undef2(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { -; CHECK-LABEL: @t21_ult_slt_vec_undef2( +define <3 x i32> @t21_ult_slt_vec_poison2(<3 x i32> %x, <3 x i32> %replacement_low, <3 x i32> %replacement_high) { +; CHECK-LABEL: @t21_ult_slt_vec_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <3 x i32> [[X:%.*]], zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <3 x i32> [[X]], <i32 65535, i32 65535, i32 65535> ; CHECK-NEXT: [[TMP3:%.*]] = select <3 x i1> [[TMP1]], <3 x i32> [[REPLACEMENT_LOW:%.*]], <3 x i32> [[X]] ; CHECK-NEXT: [[R:%.*]] = select <3 x i1> [[TMP2]], <3 x i32> [[REPLACEMENT_HIGH:%.*]], <3 x i32> [[TMP3]] ; CHECK-NEXT: ret <3 x i32> [[R]] ; - %t0 = icmp slt <3 x i32> %x, <i32 65536, i32 undef, i32 65536> + %t0 = icmp slt <3 x i32> %x, <i32 65536, i32 poison, i32 65536> %t1 = select <3 x i1> %t0, <3 x i32> %replacement_low, <3 x i32> %replacement_high - %t2 = icmp ult <3 x i32> %x, <i32 65536, i32 undef, i32 65536> + %t2 = icmp ult <3 x i32> %x, <i32 65536, i32 poison, i32 65536> %r = select <3 x i1> %t2, <3 x i32> %x, <3 x i32> %t1 ret <3 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 5b7a99d..7597706 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -79,12 +79,12 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp eq <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index 160d968..95e6d5a 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -79,22 +79,22 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp ne <3 x i8> %tmp0, %x ret <3 x i1> %ret } -define <3 x i1> @p3_vec_nonsplat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_nonsplat_undef( +define <3 x i1> @p3_vec_nonsplat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_nonsplat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 -1, i8 -1, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 -1, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 -1, i8 poison, i8 3> %ret = icmp ne <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll index 6092104..ae503bf 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sge-to-icmp-sle.ll @@ -58,12 +58,12 @@ define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp sge <3 x i8> %tmp0, %x ret <3 x i1> %ret } @@ -175,11 +175,11 @@ define <2 x i1> @n3_vec(<2 x i8> %x) { define <3 x i1> @n4_vec(<3 x i8> %x) { ; CHECK-LABEL: @n4_vec( -; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1> +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 poison, i8 -1> ; CHECK-NEXT: [[RET:%.*]] = icmp sge <3 x i8> [[TMP0]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 -1> %ret = icmp sge <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll index 6345e70..f1333fe 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sgt-to-icmp-sgt.ll @@ -72,26 +72,26 @@ define <2 x i1> @p2_vec_nonsplat_edgecase() { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef() { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison() { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp sgt <3 x i8> [[X]], <i8 3, i8 3, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp sgt <3 x i8> %x, %tmp0 ret <3 x i1> %ret } -define <3 x i1> @p3_vec_nonsplat_undef() { -; CHECK-LABEL: @p3_vec_nonsplat_undef( +define <3 x i1> @p3_vec_nonsplat_poison() { +; CHECK-LABEL: @p3_vec_nonsplat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp sgt <3 x i8> [[X]], <i8 15, i8 3, i8 15> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 15, i8 3, i8 undef> + %tmp0 = and <3 x i8> %x, <i8 15, i8 3, i8 poison> %ret = icmp sgt <3 x i8> %x, %tmp0 ret <3 x i1> %ret } @@ -212,12 +212,12 @@ define <2 x i1> @n3_vec() { define <3 x i1> @n4_vec() { ; CHECK-LABEL: @n4_vec( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X]], <i8 3, i8 undef, i8 -1> +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X]], <i8 3, i8 poison, i8 -1> ; CHECK-NEXT: [[RET:%.*]] = icmp sgt <3 x i8> [[X]], [[TMP0]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 -1> %ret = icmp sgt <3 x i8> %x, %tmp0 ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll index b7aec53..4bed21a 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-sle-to-icmp-sle.ll @@ -72,14 +72,14 @@ define <2 x i1> @p2_vec_nonsplat_edgecase() { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef() { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison() { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[X]], <i8 4, i8 4, i8 4> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp sle <3 x i8> %x, %tmp0 ret <3 x i1> %ret } @@ -200,12 +200,12 @@ define <2 x i1> @n3_vec() { define <3 x i1> @n4_vec() { ; CHECK-LABEL: @n4_vec( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() -; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X]], <i8 3, i8 undef, i8 -1> +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X]], <i8 3, i8 poison, i8 -1> ; CHECK-NEXT: [[RET:%.*]] = icmp sle <3 x i8> [[X]], [[TMP0]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 -1> %ret = icmp sle <3 x i8> %x, %tmp0 ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll index 56661d3..be6e3d0 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-slt-to-icmp-sgt.ll @@ -58,22 +58,22 @@ define <2 x i1> @p2_vec_nonsplat_edgecase(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp sgt <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp slt <3 x i8> %tmp0, %x ret <3 x i1> %ret } -define <3 x i1> @p3_vec_nonsplat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_nonsplat_undef( +define <3 x i1> @p3_vec_nonsplat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_nonsplat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp sgt <3 x i8> [[X:%.*]], <i8 15, i8 15, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 undef, i8 15, i8 3> + %tmp0 = and <3 x i8> %x, <i8 poison, i8 15, i8 3> %ret = icmp slt <3 x i8> %tmp0, %x ret <3 x i1> %ret } @@ -185,11 +185,11 @@ define <2 x i1> @n3(<2 x i8> %x) { define <3 x i1> @n4(<3 x i8> %x) { ; CHECK-LABEL: @n4( -; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 undef, i8 -1> +; CHECK-NEXT: [[TMP0:%.*]] = and <3 x i8> [[X:%.*]], <i8 3, i8 poison, i8 -1> ; CHECK-NEXT: [[RET:%.*]] = icmp slt <3 x i8> [[TMP0]], [[X]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 -1> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 -1> %ret = icmp slt <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll index a93e8f7..cfd48821 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-uge-to-icmp-ule.ll @@ -79,12 +79,12 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X:%.*]], <i8 4, i8 4, i8 4> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp uge <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll index 73ea4d4..6f6ba95 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ugt-to-icmp-ugt.ll @@ -95,26 +95,26 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1() { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef() { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison() { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X]], <i8 3, i8 3, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp ugt <3 x i8> %x, %tmp0 ret <3 x i1> %ret } -define <3 x i1> @p3_vec_nonsplat_undef() { -; CHECK-LABEL: @p3_vec_nonsplat_undef( +define <3 x i1> @p3_vec_nonsplat_poison() { +; CHECK-LABEL: @p3_vec_nonsplat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X]], <i8 3, i8 3, i8 15> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 15> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 15> %ret = icmp ugt <3 x i8> %x, %tmp0 ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll index 53886b5..54f0032 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ule-to-icmp-ule.ll @@ -95,14 +95,14 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1() { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef() { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison() { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[X:%.*]] = call <3 x i8> @gen3x8() ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[X]], <i8 4, i8 4, i8 4> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %x = call <3 x i8> @gen3x8() - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp ule <3 x i8> %x, %tmp0 ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll index d66be57..008fc6d 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-constant-low-bit-mask-and-icmp-ult-to-icmp-ugt.ll @@ -80,22 +80,22 @@ define <2 x i1> @p2_vec_nonsplat_edgecase1(<2 x i8> %x) { ret <2 x i1> %ret } -define <3 x i1> @p3_vec_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_splat_undef( +define <3 x i1> @p3_vec_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_splat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 3, i8 3, i8 3> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 3, i8 undef, i8 3> + %tmp0 = and <3 x i8> %x, <i8 3, i8 poison, i8 3> %ret = icmp ult <3 x i8> %tmp0, %x ret <3 x i1> %ret } -define <3 x i1> @p3_vec_nonsplat_undef(<3 x i8> %x) { -; CHECK-LABEL: @p3_vec_nonsplat_undef( +define <3 x i1> @p3_vec_nonsplat_poison(<3 x i8> %x) { +; CHECK-LABEL: @p3_vec_nonsplat_poison( ; CHECK-NEXT: [[RET:%.*]] = icmp ugt <3 x i8> [[X:%.*]], <i8 7, i8 31, i8 7> ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = and <3 x i8> %x, <i8 7, i8 31, i8 undef> + %tmp0 = and <3 x i8> %x, <i8 7, i8 31, i8 poison> %ret = icmp ult <3 x i8> %tmp0, %x ret <3 x i1> %ret } diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll index 38611d8..dc5658d 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-eq-to-icmp-ule.ll @@ -40,13 +40,13 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]] +define <3 x i1> @p2_vec_poison(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison( +; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp uge <3 x i8> [[TMP0]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %tmp0 = lshr <3 x i8> <i8 -1, i8 poison, i8 -1>, %y %tmp1 = and <3 x i8> %tmp0, %x %ret = icmp eq <3 x i8> %tmp1, %x ret <3 x i1> %ret diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll index 37d317b..8fbbd2b 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-and-icmp-ne-to-icmp-ugt.ll @@ -40,13 +40,13 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef( -; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]] +define <3 x i1> @p2_vec_poison(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison( +; CHECK-NEXT: [[TMP0:%.*]] = lshr <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[TMP0]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %tmp0 = lshr <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %tmp0 = lshr <3 x i8> <i8 -1, i8 poison, i8 -1>, %y %tmp1 = and <3 x i8> %tmp0, %x %ret = icmp ne <3 x i8> %tmp1, %x ret <3 x i1> %ret diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll index dfd67ea..88487b3 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-eq-to-icmp-ule.ll @@ -44,40 +44,40 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef0( +define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp eq <3 x i8> %t2, %x ret <3 x i1> %ret } -define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p3_vec_undef0( +define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p3_vec_poison0( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp eq <3 x i8> %t2, %x ret <3 x i1> %ret } -define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p4_vec_undef2( +define <3 x i1> @p4_vec_poison2(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p4_vec_poison2( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp eq <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp eq <3 x i8> %t2, %x ret <3 x i1> %ret diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll index 608e133..b717925 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v2-and-icmp-ne-to-icmp-ugt.ll @@ -44,40 +44,40 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef0( +define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp ne <3 x i8> %t2, %x ret <3 x i1> %ret } -define <3 x i1> @p3_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p3_vec_undef0( +define <3 x i1> @p3_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p3_vec_poison0( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %y - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp ne <3 x i8> %t2, %x ret <3 x i1> %ret } -define <3 x i1> @p4_vec_undef2(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p4_vec_undef2( +define <3 x i1> @p4_vec_poison2(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p4_vec_poison2( ; CHECK-NEXT: [[X_HIGHBITS:%.*]] = lshr <3 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[RET:%.*]] = icmp ne <3 x i8> [[X_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %t2 = and <3 x i8> %t1, %x %ret = icmp ne <3 x i8> %t2, %x ret <3 x i1> %ret diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll index d13129c..f48d284 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-eq-to-icmp-ule.ll @@ -54,15 +54,15 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef0( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]] +define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison0( +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp uge <3 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y call void @use3i8(<3 x i8> %t0) %t1 = lshr <3 x i8> %t0, %y %t2 = and <3 x i8> %t1, %x diff --git a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll index a1517b36..f4b3c67 100644 --- a/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll +++ b/llvm/test/Transforms/InstCombine/canonicalize-low-bit-mask-v4-and-icmp-ne-to-icmp-ugt.ll @@ -54,15 +54,15 @@ define <2 x i1> @p1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %ret } -define <3 x i1> @p2_vec_undef0(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @p2_vec_undef0( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, [[Y:%.*]] +define <3 x i1> @p2_vec_poison0(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @p2_vec_poison0( +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i8> <i8 -1, i8 poison, i8 -1>, [[Y:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) ; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[Y]] ; CHECK-NEXT: [[RET:%.*]] = icmp ult <3 x i8> [[T1]], [[X:%.*]] ; CHECK-NEXT: ret <3 x i1> [[RET]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %y + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %y call void @use3i8(<3 x i8> %t0) %t1 = lshr <3 x i8> %t0, %y %t2 = and <3 x i8> %t1, %x diff --git a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll index 9b51a76..7b6d07a 100644 --- a/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-int-icmp-eq-0.ll @@ -603,7 +603,7 @@ define i1 @i16_cast_cmp_sgt_int_m1_sitofp_half(i16 %i) { ret i1 %cmp } -; Verify that vector types and vector constants including undef elements are transformed too. +; Verify that vector types and vector constants including poison elements are transformed too. define <3 x i1> @i32_cast_cmp_ne_int_0_sitofp_double_vec(<3 x i32> %i) { ; CHECK-LABEL: @i32_cast_cmp_ne_int_0_sitofp_double_vec( @@ -616,38 +616,38 @@ define <3 x i1> @i32_cast_cmp_ne_int_0_sitofp_double_vec(<3 x i32> %i) { ret <3 x i1> %cmp } -; TODO: Can we propagate the constant vector with undef element? +; TODO: Can we propagate the constant vector with poison element? -define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_float_vec_undef( +define <3 x i1> @i32_cast_cmp_eq_int_0_sitofp_float_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_eq_int_0_sitofp_float_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i32> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> - %cmp = icmp eq <3 x i32> %b, <i32 0, i32 undef, i32 0> + %cmp = icmp eq <3 x i32> %b, <i32 0, i32 poison, i32 0> ret <3 x i1> %cmp } -define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_undef(<3 x i64> %i) { -; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_undef( +define <3 x i1> @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison(<3 x i64> %i) { +; CHECK-LABEL: @i64_cast_cmp_slt_int_1_sitofp_half_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i64> [[I:%.*]], <i64 1, i64 1, i64 1> ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i64> %i to <3 x half> %b = bitcast <3 x half> %f to <3 x i16> - %cmp = icmp slt <3 x i16> %b, <i16 1, i16 undef, i16 1> + %cmp = icmp slt <3 x i16> %b, <i16 1, i16 poison, i16 1> ret <3 x i1> %cmp } -define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_undef(<3 x i16> %i) { -; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_undef( +define <3 x i1> @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison(<3 x i16> %i) { +; CHECK-LABEL: @i16_cast_cmp_sgt_int_m1_sitofp_float_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i16> [[I:%.*]], <i16 -1, i16 -1, i16 -1> ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = sitofp <3 x i16> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> - %cmp = icmp sgt <3 x i32> %b, <i32 -1, i32 undef, i32 -1> + %cmp = icmp sgt <3 x i32> %b, <i32 -1, i32 poison, i32 -1> ret <3 x i1> %cmp } diff --git a/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll b/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll index 0752576..1565fb7c 100644 --- a/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll +++ b/llvm/test/Transforms/InstCombine/cast-unsigned-icmp-eqcmp-0.ll @@ -27,14 +27,14 @@ define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_float_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_float_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_float_vec_undef( +define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_float_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_float_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> - %cmp = icmp eq <3 x i32> %b, <i32 0, i32 undef, i32 0> + %cmp = icmp eq <3 x i32> %b, <i32 0, i32 poison, i32 0> ret <3 x i1> %cmp } @@ -60,14 +60,14 @@ define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_float_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_float_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_float_vec_undef( +define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_float_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_float_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x float> %b = bitcast <3 x float> %f to <3 x i32> - %cmp = icmp ne <3 x i32> %b, <i32 0, i32 undef, i32 0> + %cmp = icmp ne <3 x i32> %b, <i32 0, i32 poison, i32 0> ret <3 x i1> %cmp } @@ -93,14 +93,14 @@ define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_double_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_double_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_double_vec_undef( +define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_double_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_double_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x double> %b = bitcast <3 x double> %f to <3 x i64> - %cmp = icmp eq <3 x i64> %b, <i64 0, i64 undef, i64 0> + %cmp = icmp eq <3 x i64> %b, <i64 0, i64 poison, i64 0> ret <3 x i1> %cmp } @@ -126,14 +126,14 @@ define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_double_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_double_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_double_vec_undef( +define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_double_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_double_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x double> %b = bitcast <3 x double> %f to <3 x i64> - %cmp = icmp ne <3 x i64> %b, <i64 0, i64 undef, i64 0> + %cmp = icmp ne <3 x i64> %b, <i64 0, i64 poison, i64 0> ret <3 x i1> %cmp } @@ -159,14 +159,14 @@ define <2 x i1> @i32_cast_cmp_eq_int_0_uitofp_half_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_half_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_half_vec_undef( +define <3 x i1> @i32_cast_cmp_eq_int_0_uitofp_half_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_eq_int_0_uitofp_half_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x half> %b = bitcast <3 x half> %f to <3 x i16> - %cmp = icmp eq <3 x i16> %b, <i16 0, i16 undef, i16 0> + %cmp = icmp eq <3 x i16> %b, <i16 0, i16 poison, i16 0> ret <3 x i1> %cmp } @@ -192,13 +192,13 @@ define <2 x i1> @i32_cast_cmp_ne_int_0_uitofp_half_vec(<2 x i32> %i) { ret <2 x i1> %cmp } -define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_half_vec_undef(<3 x i32> %i) { -; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_half_vec_undef( +define <3 x i1> @i32_cast_cmp_ne_int_0_uitofp_half_vec_poison(<3 x i32> %i) { +; CHECK-LABEL: @i32_cast_cmp_ne_int_0_uitofp_half_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <3 x i32> [[I:%.*]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[CMP]] ; %f = uitofp <3 x i32> %i to <3 x half> %b = bitcast <3 x half> %f to <3 x i16> - %cmp = icmp ne <3 x i16> %b, <i16 0, i16 undef, i16 0> + %cmp = icmp ne <3 x i16> %b, <i16 0, i16 poison, i16 0> ret <3 x i1> %cmp } diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll index d9c93ba..04a3e89 100644 --- a/llvm/test/Transforms/InstCombine/cast.ll +++ b/llvm/test/Transforms/InstCombine/cast.ll @@ -508,18 +508,16 @@ define <2 x i16> @test40vec_nonuniform(<2 x i16> %a) { ret <2 x i16> %r } -define <2 x i16> @test40vec_undef(<2 x i16> %a) { -; ALL-LABEL: @test40vec_undef( -; ALL-NEXT: [[T:%.*]] = zext <2 x i16> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[T21:%.*]] = lshr <2 x i32> [[T]], <i32 9, i32 undef> -; ALL-NEXT: [[T5:%.*]] = shl <2 x i32> [[T]], <i32 8, i32 undef> -; ALL-NEXT: [[T32:%.*]] = or <2 x i32> [[T21]], [[T5]] -; ALL-NEXT: [[R:%.*]] = trunc <2 x i32> [[T32]] to <2 x i16> +define <2 x i16> @test40vec_poison(<2 x i16> %a) { +; ALL-LABEL: @test40vec_poison( +; ALL-NEXT: [[T21:%.*]] = lshr <2 x i16> [[A:%.*]], <i16 9, i16 poison> +; ALL-NEXT: [[T5:%.*]] = shl <2 x i16> [[A]], <i16 8, i16 poison> +; ALL-NEXT: [[R:%.*]] = or disjoint <2 x i16> [[T21]], [[T5]] ; ALL-NEXT: ret <2 x i16> [[R]] ; %t = zext <2 x i16> %a to <2 x i32> - %t21 = lshr <2 x i32> %t, <i32 9, i32 undef> - %t5 = shl <2 x i32> %t, <i32 8, i32 undef> + %t21 = lshr <2 x i32> %t, <i32 9, i32 poison> + %t5 = shl <2 x i32> %t, <i32 8, i32 poison> %t32 = or <2 x i32> %t21, %t5 %r = trunc <2 x i32> %t32 to <2 x i16> ret <2 x i16> %r @@ -1452,7 +1450,7 @@ define i32 @test89() { ; LE-LABEL: @test89( ; LE-NEXT: ret i32 6 ; - ret i32 bitcast (<2 x i16> <i16 6, i16 undef> to i32) + ret i32 bitcast (<2 x i16> <i16 6, i16 poison> to i32) } define <2 x i32> @test90() { @@ -1462,7 +1460,7 @@ define <2 x i32> @test90() { ; LE-LABEL: @test90( ; LE-NEXT: ret <2 x i32> <i32 0, i32 1006632960> ; - %t6 = bitcast <4 x half> <half undef, half undef, half undef, half 0xH3C00> to <2 x i32> + %t6 = bitcast <4 x half> <half poison, half poison, half poison, half 0xH3C00> to <2 x i32> ret <2 x i32> %t6 } @@ -1537,13 +1535,13 @@ define <2 x i8> @trunc_lshr_sext_uniform(<2 x i8> %A) { ret <2 x i8> %D } -define <2 x i8> @trunc_lshr_sext_uniform_undef(<2 x i8> %A) { -; ALL-LABEL: @trunc_lshr_sext_uniform_undef( -; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 6, i8 undef> +define <2 x i8> @trunc_lshr_sext_uniform_poison(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_uniform_poison( +; ALL-NEXT: [[D:%.*]] = ashr <2 x i8> [[A:%.*]], <i8 6, i8 poison> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = sext <2 x i8> %A to <2 x i32> - %C = lshr <2 x i32> %B, <i32 6, i32 undef> + %C = lshr <2 x i32> %B, <i32 6, i32 poison> %D = trunc <2 x i32> %C to <2 x i8> ret <2 x i8> %D } @@ -1559,13 +1557,13 @@ define <2 x i8> @trunc_lshr_sext_nonuniform(<2 x i8> %A) { ret <2 x i8> %D } -define <3 x i8> @trunc_lshr_sext_nonuniform_undef(<3 x i8> %A) { -; ALL-LABEL: @trunc_lshr_sext_nonuniform_undef( -; ALL-NEXT: [[D:%.*]] = ashr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 undef> +define <3 x i8> @trunc_lshr_sext_nonuniform_poison(<3 x i8> %A) { +; ALL-LABEL: @trunc_lshr_sext_nonuniform_poison( +; ALL-NEXT: [[D:%.*]] = ashr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 poison> ; ALL-NEXT: ret <3 x i8> [[D]] ; %B = sext <3 x i8> %A to <3 x i32> - %C = lshr <3 x i32> %B, <i32 6, i32 2, i32 undef> + %C = lshr <3 x i32> %B, <i32 6, i32 2, i32 poison> %D = trunc <3 x i32> %C to <3 x i8> ret <3 x i8> %D } @@ -2014,15 +2012,13 @@ define <2 x i8> @trunc_lshr_zext_uniform(<2 x i8> %A) { ret <2 x i8> %D } -define <2 x i8> @trunc_lshr_zext_uniform_undef(<2 x i8> %A) { -; ALL-LABEL: @trunc_lshr_zext_uniform_undef( -; ALL-NEXT: [[B:%.*]] = zext <2 x i8> [[A:%.*]] to <2 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], <i32 6, i32 undef> -; ALL-NEXT: [[D:%.*]] = trunc nuw <2 x i32> [[C]] to <2 x i8> +define <2 x i8> @trunc_lshr_zext_uniform_poison(<2 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_uniform_poison( +; ALL-NEXT: [[D:%.*]] = lshr <2 x i8> [[A:%.*]], <i8 6, i8 poison> ; ALL-NEXT: ret <2 x i8> [[D]] ; %B = zext <2 x i8> %A to <2 x i32> - %C = lshr <2 x i32> %B, <i32 6, i32 undef> + %C = lshr <2 x i32> %B, <i32 6, i32 poison> %D = trunc <2 x i32> %C to <2 x i8> ret <2 x i8> %D } @@ -2038,15 +2034,13 @@ define <2 x i8> @trunc_lshr_zext_nonuniform(<2 x i8> %A) { ret <2 x i8> %D } -define <3 x i8> @trunc_lshr_zext_nonuniform_undef(<3 x i8> %A) { -; ALL-LABEL: @trunc_lshr_zext_nonuniform_undef( -; ALL-NEXT: [[B:%.*]] = zext <3 x i8> [[A:%.*]] to <3 x i32> -; ALL-NEXT: [[C:%.*]] = lshr <3 x i32> [[B]], <i32 6, i32 2, i32 undef> -; ALL-NEXT: [[D:%.*]] = trunc nuw <3 x i32> [[C]] to <3 x i8> +define <3 x i8> @trunc_lshr_zext_nonuniform_poison(<3 x i8> %A) { +; ALL-LABEL: @trunc_lshr_zext_nonuniform_poison( +; ALL-NEXT: [[D:%.*]] = lshr <3 x i8> [[A:%.*]], <i8 6, i8 2, i8 poison> ; ALL-NEXT: ret <3 x i8> [[D]] ; %B = zext <3 x i8> %A to <3 x i32> - %C = lshr <3 x i32> %B, <i32 6, i32 2, i32 undef> + %C = lshr <3 x i32> %B, <i32 6, i32 2, i32 poison> %D = trunc <3 x i32> %C to <3 x i8> ret <3 x i8> %D } diff --git a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll index 9da9eb3..1dd0b17 100644 --- a/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll +++ b/llvm/test/Transforms/InstCombine/clamp-to-minmax.ll @@ -472,7 +472,7 @@ define float @ui32_clamp_and_cast_to_float(i32 %x) { ; CHECK-LABEL: @ui32_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i32 [[X:%.*]], 0 ; CHECK-NEXT: [[MIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 255) -; CHECK-NEXT: [[MIN:%.*]] = uitofp i32 [[MIN1]] to float +; CHECK-NEXT: [[MIN:%.*]] = uitofp nneg i32 [[MIN1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]] ; CHECK-NEXT: ret float [[R]] ; @@ -488,7 +488,7 @@ define float @ui64_clamp_and_cast_to_float(i64 %x) { ; CHECK-LABEL: @ui64_clamp_and_cast_to_float( ; CHECK-NEXT: [[LO_CMP:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: [[MIN1:%.*]] = call i64 @llvm.umin.i64(i64 [[X]], i64 255) -; CHECK-NEXT: [[MIN:%.*]] = uitofp i64 [[MIN1]] to float +; CHECK-NEXT: [[MIN:%.*]] = uitofp nneg i64 [[MIN1]] to float ; CHECK-NEXT: [[R:%.*]] = select i1 [[LO_CMP]], float 1.000000e+00, float [[MIN]] ; CHECK-NEXT: ret float [[R]] ; @@ -504,7 +504,7 @@ define float @mixed_clamp_to_float_1(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_1( ; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %si_min_cmp = icmp sgt i32 %x, 255 @@ -539,7 +539,7 @@ define float @mixed_clamp_to_float_2(i32 %x) { ; CHECK-LABEL: @mixed_clamp_to_float_2( ; CHECK-NEXT: [[SI_MIN:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 255) ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[SI_MIN]], i32 1) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %si_min_cmp = icmp sgt i32 %x, 255 @@ -572,7 +572,7 @@ define <2 x float> @mixed_clamp_to_float_vec(<2 x i32> %x) { ; CHECK-LABEL: @mixed_clamp_to_float_vec( ; CHECK-NEXT: [[SI_MIN:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[X:%.*]], <2 x i32> <i32 255, i32 255>) ; CHECK-NEXT: [[R1:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[SI_MIN]], <2 x i32> <i32 1, i32 1>) -; CHECK-NEXT: [[R:%.*]] = sitofp <2 x i32> [[R1]] to <2 x float> +; CHECK-NEXT: [[R:%.*]] = uitofp nneg <2 x i32> [[R1]] to <2 x float> ; CHECK-NEXT: ret <2 x float> [[R]] ; %si_min_cmp = icmp sgt <2 x i32> %x, <i32 255, i32 255> diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll index 5d27f37..7086855 100644 --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -116,14 +116,14 @@ define <2 x i32> @ctpop3v(<2 x i32> %0) { ret <2 x i32> %5 } -define <2 x i32> @ctpop3v_undef(<2 x i32> %0) { -; CHECK-LABEL: @ctpop3v_undef( +define <2 x i32> @ctpop3v_poison(<2 x i32> %0) { +; CHECK-LABEL: @ctpop3v_poison( ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false), !range [[RNG0]] ; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = sub <2 x i32> zeroinitializer, %0 %3 = and <2 x i32> %2, %0 - %4 = add <2 x i32> %3, <i32 -1, i32 undef> + %4 = add <2 x i32> %3, <i32 -1, i32 poison> %5 = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %4) ret <2 x i32> %5 } diff --git a/llvm/test/Transforms/InstCombine/ctpop.ll b/llvm/test/Transforms/InstCombine/ctpop.ll index 2719472..b3653e5 100644 --- a/llvm/test/Transforms/InstCombine/ctpop.ll +++ b/llvm/test/Transforms/InstCombine/ctpop.ll @@ -155,28 +155,27 @@ define <2 x i32> @_parity_of_not_vec(<2 x i32> %x) { ret <2 x i32> %r } -define <2 x i32> @_parity_of_not_undef(<2 x i32> %x) { -; CHECK-LABEL: @_parity_of_not_undef( +define <2 x i32> @_parity_of_not_poison(<2 x i32> %x) { +; CHECK-LABEL: @_parity_of_not_poison( ; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]), !range [[RNG1]] ; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1> ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %neg = xor <2 x i32> %x, <i32 undef ,i32 -1> + %neg = xor <2 x i32> %x, <i32 poison ,i32 -1> %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %neg) %r = and <2 x i32> %cnt, <i32 1 ,i32 1> ret <2 x i32> %r } -define <2 x i32> @_parity_of_not_undef2(<2 x i32> %x) { -; CHECK-LABEL: @_parity_of_not_undef2( -; CHECK-NEXT: [[NEG:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1> -; CHECK-NEXT: [[CNT:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[NEG]]), !range [[RNG1]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[CNT]], <i32 1, i32 undef> +define <2 x i32> @_parity_of_not_poison2(<2 x i32> %x) { +; CHECK-LABEL: @_parity_of_not_poison2( +; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]), !range [[RNG1]] +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[CNT]], <i32 1, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %neg = xor <2 x i32> %x, <i32 -1 ,i32 -1> %cnt = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %neg) - %r = and <2 x i32> %cnt, <i32 1 ,i32 undef> + %r = and <2 x i32> %cnt, <i32 1 ,i32 poison> ret <2 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fabs-as-int.ll index f32c00e..4e49ff1 100644 --- a/llvm/test/Transforms/InstCombine/fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fabs-as-int.ll @@ -137,15 +137,15 @@ define <2 x i32> @not_fabs_as_int_v2f32_nonsplat(<2 x float> %x) { ret <2 x i32> %and } -define <3 x i32> @fabs_as_int_v3f32_undef(<3 x float> %x) { -; CHECK-LABEL: define <3 x i32> @fabs_as_int_v3f32_undef +define <3 x i32> @fabs_as_int_v3f32_poison(<3 x float> %x) { +; CHECK-LABEL: define <3 x i32> @fabs_as_int_v3f32_poison ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x float> @llvm.fabs.v3f32(<3 x float> [[X]]) ; CHECK-NEXT: [[AND:%.*]] = bitcast <3 x float> [[TMP1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[AND]] ; %bc = bitcast <3 x float> %x to <3 x i32> - %and = and <3 x i32> %bc, <i32 2147483647, i32 undef, i32 2147483647> + %and = and <3 x i32> %bc, <i32 2147483647, i32 poison, i32 2147483647> ret <3 x i32> %and } diff --git a/llvm/test/Transforms/InstCombine/fabs.ll b/llvm/test/Transforms/InstCombine/fabs.ll index 7e380c2..5ec6578 100644 --- a/llvm/test/Transforms/InstCombine/fabs.ll +++ b/llvm/test/Transforms/InstCombine/fabs.ll @@ -321,7 +321,7 @@ define <2 x float> @select_fcmp_nnan_ole_negzero(<2 x float> %x) { ; CHECK-NEXT: ret <2 x float> [[FABS]] ; %lezero = fcmp ole <2 x float> %x, <float -0.0, float -0.0> - %negx = fsub nnan <2 x float> <float 0.0, float undef>, %x + %negx = fsub nnan <2 x float> <float 0.0, float poison>, %x %fabs = select <2 x i1> %lezero, <2 x float> %negx, <2 x float> %x ret <2 x float> %fabs } @@ -332,7 +332,7 @@ define <2 x float> @select_nnan_fcmp_nnan_ole_negzero(<2 x float> %x) { ; CHECK-NEXT: ret <2 x float> [[FABS]] ; %lezero = fcmp ole <2 x float> %x, <float -0.0, float -0.0> - %negx = fsub nnan <2 x float> <float 0.0, float undef>, %x + %negx = fsub nnan <2 x float> <float 0.0, float poison>, %x %fabs = select nnan <2 x i1> %lezero, <2 x float> %negx, <2 x float> %x ret <2 x float> %fabs } diff --git a/llvm/test/Transforms/InstCombine/fast-math.ll b/llvm/test/Transforms/InstCombine/fast-math.ll index 129d781..83f2091 100644 --- a/llvm/test/Transforms/InstCombine/fast-math.ll +++ b/llvm/test/Transforms/InstCombine/fast-math.ll @@ -541,12 +541,12 @@ define float @fneg2(float %x) { ret float %sub } -define <2 x float> @fneg2_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fneg2_vec_undef( +define <2 x float> @fneg2_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fneg2_vec_poison( ; CHECK-NEXT: [[SUB:%.*]] = fneg nsz <2 x float> [[X:%.*]] ; CHECK-NEXT: ret <2 x float> [[SUB]] ; - %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x + %sub = fsub nsz <2 x float> <float poison, float 0.0>, %x ret <2 x float> %sub } @@ -562,7 +562,7 @@ define float @fdiv1(float %x) { ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000 ; CHECK-NEXT: ret float [[DIV1]] ; - %div = fdiv float %x, 0x3FF3333340000000 + %div = fdiv fast float %x, 0x3FF3333340000000 %div1 = fdiv fast float %div, 0x4002666660000000 ret float %div1 ; 0x3FF3333340000000 = 1.2f @@ -603,7 +603,7 @@ define float @fdiv3(float %x) { ; CHECK-NEXT: [[DIV1:%.*]] = fdiv fast float [[TMP1]], 0x47EFFFFFE0000000 ; CHECK-NEXT: ret float [[DIV1]] ; - %div = fdiv float %x, 0x47EFFFFFE0000000 + %div = fdiv fast float %x, 0x47EFFFFFE0000000 %div1 = fdiv fast float %div, 0x4002666660000000 ret float %div1 } diff --git a/llvm/test/Transforms/InstCombine/fcmp-special.ll b/llvm/test/Transforms/InstCombine/fcmp-special.ll index 88bfe93..64bc86f 100644 --- a/llvm/test/Transforms/InstCombine/fcmp-special.ll +++ b/llvm/test/Transforms/InstCombine/fcmp-special.ll @@ -144,21 +144,21 @@ define <2 x i1> @uno_vec_with_nan(<2 x double> %x) { ret <2 x i1> %f } -define <2 x i1> @uno_vec_with_undef(<2 x double> %x) { -; CHECK-LABEL: @uno_vec_with_undef( +define <2 x i1> @uno_vec_with_poison(<2 x double> %x) { +; CHECK-LABEL: @uno_vec_with_poison( ; CHECK-NEXT: [[F:%.*]] = fcmp uno <2 x double> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[F]] ; - %f = fcmp uno <2 x double> %x, <double 3.0, double undef> + %f = fcmp uno <2 x double> %x, <double 3.0, double poison> ret <2 x i1> %f } -define <2 x i1> @ord_vec_with_undef(<2 x double> %x) { -; CHECK-LABEL: @ord_vec_with_undef( -; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x double> [[X:%.*]], <double 0.000000e+00, double undef> +define <2 x i1> @ord_vec_with_poison(<2 x double> %x) { +; CHECK-LABEL: @ord_vec_with_poison( +; CHECK-NEXT: [[F:%.*]] = fcmp ord <2 x double> [[X:%.*]], <double 0.000000e+00, double poison> ; CHECK-NEXT: ret <2 x i1> [[F]] ; - %f = fcmp ord <2 x double> %x, <double 0.0, double undef> + %f = fcmp ord <2 x double> %x, <double 0.0, double poison> ret <2 x i1> %f } @@ -224,12 +224,12 @@ define <2 x i1> @negative_zero_olt_vec(<2 x float> %x) { ret <2 x i1> %r } -define <2 x i1> @negative_zero_une_vec_undef(<2 x double> %x) { -; CHECK-LABEL: @negative_zero_une_vec_undef( +define <2 x i1> @negative_zero_une_vec_poison(<2 x double> %x) { +; CHECK-LABEL: @negative_zero_une_vec_poison( ; CHECK-NEXT: [[R:%.*]] = fcmp nnan une <2 x double> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %r = fcmp nnan une <2 x double> %x, <double -0.0, double undef> + %r = fcmp nnan une <2 x double> %x, <double -0.0, double poison> ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/fcmp.ll b/llvm/test/Transforms/InstCombine/fcmp.ll index f2701d1..389264e 100644 --- a/llvm/test/Transforms/InstCombine/fcmp.ll +++ b/llvm/test/Transforms/InstCombine/fcmp.ll @@ -102,12 +102,12 @@ define <2 x i1> @unary_fneg_constant_swap_pred_vec(<2 x float> %x) { ret <2 x i1> %cmp } -define <2 x i1> @fneg_constant_swap_pred_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fneg_constant_swap_pred_vec_undef( +define <2 x i1> @fneg_constant_swap_pred_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fneg_constant_swap_pred_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <2 x float> [[X:%.*]], <float -1.000000e+00, float -2.000000e+00> ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %neg = fsub <2 x float> <float undef, float -0.0>, %x + %neg = fsub <2 x float> <float poison, float -0.0>, %x %cmp = fcmp ogt <2 x float> %neg, <float 1.0, float 2.0> ret <2 x i1> %cmp } @@ -234,34 +234,34 @@ define <2 x i1> @fneg_unary_fneg_swap_pred_vec(<2 x float> %x, <2 x float> %y) { ret <2 x i1> %cmp } -define <2 x i1> @fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @fneg_fneg_swap_pred_vec_undef( +define <2 x i1> @fneg_fneg_swap_pred_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fneg_fneg_swap_pred_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %neg1 = fsub <2 x float> <float -0.0, float undef>, %x - %neg2 = fsub <2 x float> <float undef, float -0.0>, %y + %neg1 = fsub <2 x float> <float -0.0, float poison>, %x + %neg2 = fsub <2 x float> <float poison, float -0.0>, %y %cmp = fcmp olt <2 x float> %neg1, %neg2 ret <2 x i1> %cmp } -define <2 x i1> @unary_fneg_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @unary_fneg_fneg_swap_pred_vec_undef( +define <2 x i1> @unary_fneg_fneg_swap_pred_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_fneg_fneg_swap_pred_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg1 = fneg <2 x float> %x - %neg2 = fsub <2 x float> <float undef, float -0.0>, %y + %neg2 = fsub <2 x float> <float poison, float -0.0>, %y %cmp = fcmp olt <2 x float> %neg1, %neg2 ret <2 x i1> %cmp } -define <2 x i1> @fneg_unary_fneg_swap_pred_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @fneg_unary_fneg_swap_pred_vec_undef( +define <2 x i1> @fneg_unary_fneg_swap_pred_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fneg_unary_fneg_swap_pred_vec_poison( ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %neg1 = fsub <2 x float> <float -0.0, float undef>, %x + %neg1 = fsub <2 x float> <float -0.0, float poison>, %x %neg2 = fneg <2 x float> %y %cmp = fcmp olt <2 x float> %neg1, %neg2 ret <2 x i1> %cmp @@ -1284,3 +1284,205 @@ define <1 x i1> @bitcast_1vec_eq0(i32 %x) { %cmp = fcmp oeq <1 x float> %f, zeroinitializer ret <1 x i1> %cmp } + +; Simplify fcmp (x + 0.0), y => fcmp x, y + +define i1 @fcmp_fadd_zero_ugt(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ugt( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ugt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_uge(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_uge( +; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp uge float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ogt(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ogt( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ogt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_oge(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_oge( +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp oge float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ult(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ult( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ult float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ule(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ule( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ule float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ule float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_olt(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_olt( +; CHECK-NEXT: [[CMP:%.*]] = fcmp olt float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp olt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ole(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ole( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ole float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ole float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_oeq(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_oeq( +; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp oeq float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_one(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_one( +; CHECK-NEXT: [[CMP:%.*]] = fcmp one float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp one float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ueq(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ueq( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ueq float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_une(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_une( +; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp une float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_ord(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_ord( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp ord float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_uno(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_uno( +; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[ADD:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp uno float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_neg_zero(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_neg_zero( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, -0.000000e+00 + %cmp = fcmp ugt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_zero_switched(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_zero_switched( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ult float [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %y, 0.000000e+00 + %cmp = fcmp ugt float %x, %add + ret i1 %cmp +} + +define <2 x i1> @fcmp_fadd_zero_vec(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fcmp_fadd_zero_vec( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt <2 x float> [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %add = fadd <2 x float> %x, <float 0.0, float -0.0> + %cmp = fcmp ugt <2 x float> %add, %y + ret <2 x i1> %cmp +} + +define i1 @fcmp_fast_fadd_fast_zero(float %x, float %y) { +; CHECK-LABEL: @fcmp_fast_fadd_fast_zero( +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ugt float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd fast float %x, 0.000000e+00 + %cmp = fcmp fast ugt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fast_fadd_zero(float %x, float %y) { +; CHECK-LABEL: @fcmp_fast_fadd_zero( +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ugt float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd float %x, 0.000000e+00 + %cmp = fcmp fast ugt float %add, %y + ret i1 %cmp +} + +define i1 @fcmp_fadd_fast_zero(float %x, float %y) { +; CHECK-LABEL: @fcmp_fadd_fast_zero( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt float [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %add = fadd fast float %x, 0.000000e+00 + %cmp = fcmp ugt float %add, %y + ret i1 %cmp +} diff --git a/llvm/test/Transforms/InstCombine/fdiv.ll b/llvm/test/Transforms/InstCombine/fdiv.ll index a0710c2..ca11685 100644 --- a/llvm/test/Transforms/InstCombine/fdiv.ll +++ b/llvm/test/Transforms/InstCombine/fdiv.ll @@ -141,12 +141,12 @@ define <2 x float> @not_exact_inverse_vec_arcp(<2 x float> %x) { ret <2 x float> %div } -define <2 x float> @not_exact_inverse_vec_arcp_with_undef_elt(<2 x float> %x) { -; CHECK-LABEL: @not_exact_inverse_vec_arcp_with_undef_elt( -; CHECK-NEXT: [[DIV:%.*]] = fdiv arcp <2 x float> [[X:%.*]], <float undef, float 3.000000e+00> +define <2 x float> @not_exact_inverse_vec_arcp_with_poison_elt(<2 x float> %x) { +; CHECK-LABEL: @not_exact_inverse_vec_arcp_with_poison_elt( +; CHECK-NEXT: [[DIV:%.*]] = fdiv arcp <2 x float> [[X:%.*]], <float poison, float 3.000000e+00> ; CHECK-NEXT: ret <2 x float> [[DIV]] ; - %div = fdiv arcp <2 x float> %x, <float undef, float 3.0> + %div = fdiv arcp <2 x float> %x, <float poison, float 3.0> ret <2 x float> %div } @@ -333,13 +333,13 @@ define <2 x float> @unary_fneg_fneg_vec(<2 x float> %x, <2 x float> %y) { ret <2 x float> %div } -define <2 x float> @fneg_fneg_vec_undef_elts(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @fneg_fneg_vec_undef_elts( +define <2 x float> @fneg_fneg_vec_poison_elts(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @fneg_fneg_vec_poison_elts( ; CHECK-NEXT: [[DIV:%.*]] = fdiv <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[DIV]] ; - %xneg = fsub <2 x float> <float undef, float -0.0>, %x - %yneg = fsub <2 x float> <float -0.0, float undef>, %y + %xneg = fsub <2 x float> <float poison, float -0.0>, %x + %yneg = fsub <2 x float> <float -0.0, float poison>, %y %div = fdiv <2 x float> %xneg, %yneg ret <2 x float> %div } @@ -404,12 +404,12 @@ define <2 x float> @unary_fneg_dividend_constant_divisor_vec(<2 x float> %x) { ret <2 x float> %div } -define <2 x float> @fneg_dividend_constant_divisor_vec_undef_elt(<2 x float> %x) { -; CHECK-LABEL: @fneg_dividend_constant_divisor_vec_undef_elt( +define <2 x float> @fneg_dividend_constant_divisor_vec_poison_elt(<2 x float> %x) { +; CHECK-LABEL: @fneg_dividend_constant_divisor_vec_poison_elt( ; CHECK-NEXT: [[DIV:%.*]] = fdiv ninf <2 x float> [[X:%.*]], <float -3.000000e+00, float 8.000000e+00> ; CHECK-NEXT: ret <2 x float> [[DIV]] ; - %neg = fsub <2 x float> <float undef, float -0.0>, %x + %neg = fsub <2 x float> <float poison, float -0.0>, %x %div = fdiv ninf <2 x float> %neg, <float 3.0, float -8.0> ret <2 x float> %div } diff --git a/llvm/test/Transforms/InstCombine/fma.ll b/llvm/test/Transforms/InstCombine/fma.ll index 8b413ae..cf3d7f3 100644 --- a/llvm/test/Transforms/InstCombine/fma.ll +++ b/llvm/test/Transforms/InstCombine/fma.ll @@ -60,13 +60,13 @@ define <2 x float> @fma_unary_fneg_x_unary_fneg_y_vec(<2 x float> %x, <2 x float ret <2 x float> %fma } -define <2 x float> @fma_fneg_x_fneg_y_vec_undef(<2 x float> %x, <2 x float> %y, <2 x float> %z) { -; CHECK-LABEL: @fma_fneg_x_fneg_y_vec_undef( +define <2 x float> @fma_fneg_x_fneg_y_vec_poison(<2 x float> %x, <2 x float> %y, <2 x float> %z) { +; CHECK-LABEL: @fma_fneg_x_fneg_y_vec_poison( ; CHECK-NEXT: [[FMA:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[X:%.*]], <2 x float> [[Y:%.*]], <2 x float> [[Z:%.*]]) ; CHECK-NEXT: ret <2 x float> [[FMA]] ; - %xn = fsub <2 x float> <float -0.0, float undef>, %x - %yn = fsub <2 x float> <float undef, float -0.0>, %y + %xn = fsub <2 x float> <float -0.0, float poison>, %x + %yn = fsub <2 x float> <float poison, float -0.0>, %y %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %xn, <2 x float> %yn, <2 x float> %z) ret <2 x float> %fma } diff --git a/llvm/test/Transforms/InstCombine/fmul-pow.ll b/llvm/test/Transforms/InstCombine/fmul-pow.ll index 63458e1..84592d2 100644 --- a/llvm/test/Transforms/InstCombine/fmul-pow.ll +++ b/llvm/test/Transforms/InstCombine/fmul-pow.ll @@ -85,8 +85,8 @@ define double @pow_ab_recip_a_reassoc(double %a, double %b) { ; CHECK-NEXT: [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p ret double %m } @@ -99,8 +99,8 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b) { ; CHECK-NEXT: [[M:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[TMP1]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %p, %r ret double %m } @@ -109,14 +109,14 @@ define double @pow_ab_recip_a_reassoc_commute(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use1(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use1( -; CHECK-NEXT: [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]] -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fmul reassoc double [[R]], [[P]] ; CHECK-NEXT: call void @use(double [[R]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %r) ret double %m @@ -126,13 +126,13 @@ define double @pow_ab_recip_a_reassoc_use1(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use2(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use2( -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]]) +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A:%.*]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fdiv reassoc double [[P]], [[A]] ; CHECK-NEXT: call void @use(double [[P]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %p) ret double %m @@ -142,15 +142,15 @@ define double @pow_ab_recip_a_reassoc_use2(double %a, double %b) { define double @pow_ab_recip_a_reassoc_use3(double %a, double %b) { ; CHECK-LABEL: @pow_ab_recip_a_reassoc_use3( -; CHECK-NEXT: [[R:%.*]] = fdiv double 1.000000e+00, [[A:%.*]] -; CHECK-NEXT: [[P:%.*]] = call double @llvm.pow.f64(double [[A]], double [[B:%.*]]) +; CHECK-NEXT: [[R:%.*]] = fdiv reassoc double 1.000000e+00, [[A:%.*]] +; CHECK-NEXT: [[P:%.*]] = call reassoc double @llvm.pow.f64(double [[A]], double [[B:%.*]]) ; CHECK-NEXT: [[M:%.*]] = fmul reassoc double [[R]], [[P]] ; CHECK-NEXT: call void @use(double [[R]]) ; CHECK-NEXT: call void @use(double [[P]]) ; CHECK-NEXT: ret double [[M]] ; - %r = fdiv double 1.0, %a - %p = call double @llvm.pow.f64(double %a, double %b) + %r = fdiv reassoc double 1.0, %a + %p = call reassoc double @llvm.pow.f64(double %a, double %b) %m = fmul reassoc double %r, %p call void @use(double %r) call void @use(double %p) diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll index f6435f0..e9c86a1 100644 --- a/llvm/test/Transforms/InstCombine/fmul.ll +++ b/llvm/test/Transforms/InstCombine/fmul.ll @@ -42,12 +42,12 @@ define <2 x float> @unary_neg_constant_vec(<2 x float> %x) { ret <2 x float> %mul } -define <2 x float> @neg_constant_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @neg_constant_vec_undef( +define <2 x float> @neg_constant_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @neg_constant_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = fmul ninf <2 x float> [[X:%.*]], <float -2.000000e+00, float -3.000000e+00> ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %sub = fsub <2 x float> <float undef, float -0.0>, %x + %sub = fsub <2 x float> <float poison, float -0.0>, %x %mul = fmul ninf <2 x float> %sub, <float 2.0, float 3.0> ret <2 x float> %mul } @@ -162,34 +162,34 @@ define <2 x float> @neg_unary_neg_vec(<2 x float> %x, <2 x float> %y) { ret <2 x float> %mul } -define <2 x float> @neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @neg_neg_vec_undef( +define <2 x float> @neg_neg_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_neg_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %sub1 = fsub <2 x float> <float -0.0, float undef>, %x - %sub2 = fsub <2 x float> <float undef, float -0.0>, %y + %sub1 = fsub <2 x float> <float -0.0, float poison>, %x + %sub2 = fsub <2 x float> <float poison, float -0.0>, %y %mul = fmul arcp <2 x float> %sub1, %sub2 ret <2 x float> %mul } -define <2 x float> @unary_neg_neg_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @unary_neg_neg_vec_undef( +define <2 x float> @unary_neg_neg_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @unary_neg_neg_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[MUL]] ; %neg = fneg <2 x float> %x - %sub = fsub <2 x float> <float undef, float -0.0>, %y + %sub = fsub <2 x float> <float poison, float -0.0>, %y %mul = fmul arcp <2 x float> %neg, %sub ret <2 x float> %mul } -define <2 x float> @neg_unary_neg_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @neg_unary_neg_vec_undef( +define <2 x float> @neg_unary_neg_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_unary_neg_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = fmul arcp <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %sub = fsub <2 x float> <float -0.0, float undef>, %x + %sub = fsub <2 x float> <float -0.0, float poison>, %x %neg = fneg <2 x float> %y %mul = fmul arcp <2 x float> %sub, %neg ret <2 x float> %mul @@ -322,13 +322,13 @@ define <2 x float> @unary_neg_mul_vec(<2 x float> %x, <2 x float> %y) { ret <2 x float> %mul } -define <2 x float> @neg_mul_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @neg_mul_vec_undef( +define <2 x float> @neg_mul_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_mul_vec_poison( ; CHECK-NEXT: [[SUB:%.*]] = fneg <2 x float> [[X:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = fmul <2 x float> [[SUB]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %sub = fsub <2 x float> <float undef, float -0.0>, %x + %sub = fsub <2 x float> <float poison, float -0.0>, %x %mul = fmul <2 x float> %sub, %y ret <2 x float> %mul } @@ -388,9 +388,9 @@ define void @test8(ptr %inout, i1 %c1) { entry: %0 = load i32, ptr %inout, align 4 %conv = uitofp i32 %0 to float - %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, float %conv, i32 3 + %vecinit = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float %conv, i32 3 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vecinit - %1 = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %1 = shufflevector <4 x float> %sub, <4 x float> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %mul = fmul <4 x float> zeroinitializer, %1 br label %for.cond @@ -633,15 +633,15 @@ define float @log2half(float %x, float %y) { define float @log2half_commute(float %x1, float %y) { ; CHECK-LABEL: @log2half_commute( +; CHECK-NEXT: [[X1:%.*]] = fmul fast float [[X2:%.*]], 0x3FC24924A0000000 ; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.log2.f32(float [[Y:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[X1]] ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast float [[TMP2]], [[X1]] -; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[TMP3]], 0x3FC24924A0000000 -; CHECK-NEXT: ret float [[MUL]] +; CHECK-NEXT: ret float [[TMP3]] ; - %x = fdiv float %x1, 7.0 ; thwart complexity-based canonicalization - %halfy = fmul float %y, 0.5 - %log2 = call float @llvm.log2.f32(float %halfy) + %x = fdiv fast float %x1, 7.0 ; thwart complexity-based canonicalization + %halfy = fmul fast float %y, 0.5 + %log2 = call fast float @llvm.log2.f32(float %halfy) %mul = fmul fast float %x, %log2 ret float %mul } @@ -653,11 +653,49 @@ define float @fdiv_constant_numerator_fmul(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]] ; CHECK-NEXT: ret float [[T3]] ; + %t1 = fdiv reassoc float 2.0e+3, %x + %t3 = fmul reassoc float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with mixed fast-math flags + +define float @fdiv_constant_numerator_fmul_mixed(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_mixed( +; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float 1.200000e+07, [[X:%.*]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv reassoc float 2.0e+3, %x + %t3 = fmul fast float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with full fast-math flags + +define float @fdiv_constant_numerator_fmul_fast(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_fast( +; CHECK-NEXT: [[T3:%.*]] = fdiv fast float 1.200000e+07, [[X:%.*]] +; CHECK-NEXT: ret float [[T3]] +; + %t1 = fdiv fast float 2.0e+3, %x + %t3 = fmul fast float %t1, 6.0e+3 + ret float %t3 +} + +; C1/X * C2 => (C1*C2) / X with no fast-math flags on the fdiv + +define float @fdiv_constant_numerator_fmul_precdiv(float %x) { +; CHECK-LABEL: @fdiv_constant_numerator_fmul_precdiv( +; CHECK-NEXT: [[T1:%.*]] = fdiv float 2.000000e+03, [[X:%.*]] +; CHECK-NEXT: [[T4:%.*]] = fmul reassoc float [[T1]], 6.000000e+03 +; CHECK-NEXT: ret float [[T4]] +; %t1 = fdiv float 2.0e+3, %x %t3 = fmul reassoc float %t1, 6.0e+3 ret float %t3 } + ; C1/X * C2 => (C1*C2) / X is disabled if C1/X has multiple uses @fmul2_external = external global float @@ -682,7 +720,7 @@ define float @fdiv_constant_denominator_fmul(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc float [[X:%.*]], 3.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 2.0e+3 + %t1 = fdiv reassoc float %x, 2.0e+3 %t3 = fmul reassoc float %t1, 6.0e+3 ret float %t3 } @@ -692,7 +730,7 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec(<4 x float> %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> ; CHECK-NEXT: ret <4 x float> [[T3]] ; - %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> + %t1 = fdiv reassoc <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> %t3 = fmul reassoc <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3> ret <4 x float> %t3 } @@ -704,8 +742,8 @@ define <4 x float> @fdiv_constant_denominator_fmul_vec_constexpr(<4 x float> %x) ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc <4 x float> [[X:%.*]], <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> ; CHECK-NEXT: ret <4 x float> [[T3]] ; - %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float> - %t1 = fdiv <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> + %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float poison> to i160) to i128) to <4 x float> + %t1 = fdiv reassoc <4 x float> %x, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> %t3 = fmul reassoc <4 x float> %t1, %constExprMul ret <4 x float> %t3 } @@ -734,7 +772,7 @@ define float @fdiv_constant_denominator_fmul_denorm(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fmul fast float [[X:%.*]], 0x3760620000000000 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 2.0e+3 + %t1 = fdiv fast float %x, 2.0e+3 %t3 = fmul fast float %t1, 0x3810000000000000 ret float %t3 } @@ -748,7 +786,7 @@ define float @fdiv_constant_denominator_fmul_denorm_try_harder(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fdiv reassoc float [[X:%.*]], 0x47E8000000000000 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fdiv float %x, 3.0 + %t1 = fdiv reassoc float %x, 3.0 %t3 = fmul reassoc float %t1, 0x3810000000000000 ret float %t3 } @@ -776,7 +814,7 @@ define float @fmul_fadd_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], 6.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fadd float %x, 2.0 + %t2 = fadd reassoc float %x, 2.0 %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -787,7 +825,7 @@ define <2 x float> @fmul_fadd_distribute_vec(<2 x float> %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <2 x float> [[TMP1]], <float 1.200000e+07, float 1.200000e+07> ; CHECK-NEXT: ret <2 x float> [[T3]] ; - %t1 = fadd <2 x float> <float 2.0e+3, float 2.0e+3>, %x + %t1 = fadd reassoc <2 x float> <float 2.0e+3, float 2.0e+3>, %x %t3 = fmul reassoc <2 x float> %t1, <float 6.0e+3, float 6.0e+3> ret <2 x float> %t3 } @@ -798,7 +836,7 @@ define <vscale x 2 x float> @fmul_fadd_distribute_scalablevec(<vscale x 2 x floa ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc <vscale x 2 x float> [[TMP1]], shufflevector (<vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 1.200000e+07, i64 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer) ; CHECK-NEXT: ret <vscale x 2 x float> [[T3]] ; - %t1 = fadd <vscale x 2 x float> splat (float 2.0e+3), %x + %t1 = fadd reassoc <vscale x 2 x float> splat (float 2.0e+3), %x %t3 = fmul reassoc <vscale x 2 x float> %t1, splat (float 6.0e+3) @@ -813,7 +851,7 @@ define float @fmul_fsub_distribute1(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc float [[TMP1]], -6.000000e+00 ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fsub float %x, 2.0 + %t2 = fsub reassoc float %x, 2.0 %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -826,7 +864,7 @@ define float @fmul_fsub_distribute2(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fsub reassoc float 6.000000e+00, [[TMP1]] ; CHECK-NEXT: ret float [[T3]] ; - %t2 = fsub float 2.0, %x + %t2 = fsub reassoc float 2.0, %x %t3 = fmul reassoc float %t2, 3.0 ret float %t3 } @@ -840,8 +878,8 @@ define float @fmul_fadd_fmul_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], 1.000000e+01 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fadd float %t1, 2.0 + %t1 = fmul fast float %x, 6.0 + %t2 = fadd fast float %t1, 2.0 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -872,8 +910,8 @@ define double @fmul_fadd_fdiv_distribute2(double %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 ; CHECK-NEXT: ret double [[T3]] ; - %t1 = fdiv double %x, 3.0 - %t2 = fadd double %t1, 5.0 + %t1 = fdiv reassoc double %x, 3.0 + %t2 = fadd reassoc double %t1, 5.0 %t3 = fmul reassoc double %t2, 0x10000000000000 ret double %t3 } @@ -887,8 +925,8 @@ define double @fmul_fadd_fdiv_distribute3(double %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc double [[TMP1]], 0x34000000000000 ; CHECK-NEXT: ret double [[T3]] ; - %t1 = fdiv double %x, 3.0 - %t2 = fadd double %t1, 5.0 + %t1 = fdiv reassoc double %x, 3.0 + %t2 = fadd reassoc double %t1, 5.0 %t3 = fmul reassoc double %t2, 0x10000000000000 ret double %t3 } @@ -902,8 +940,8 @@ define float @fmul_fsub_fmul_distribute(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fsub fast float 1.000000e+01, [[TMP1]] ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fsub float 2.0, %t1 + %t1 = fmul fast float %x, 6.0 + %t2 = fsub fast float 2.0, %t1 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -932,8 +970,8 @@ define float @fmul_fsub_fmul_distribute2(float %x) { ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[TMP1]], -1.000000e+01 ; CHECK-NEXT: ret float [[T3]] ; - %t1 = fmul float %x, 6.0 - %t2 = fsub float %t1, 2.0 + %t1 = fmul fast float %x, 6.0 + %t2 = fsub fast float %t1, 2.0 %t3 = fmul fast float %t2, 5.0 ret float %t3 } @@ -986,8 +1024,8 @@ define double @fmul_fdivs_factor_common_denominator(double %x, double %y, double ; CHECK-NEXT: [[MUL:%.*]] = fdiv fast double [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret double [[MUL]] ; - %div1 = fdiv double %x, %z - %div2 = fdiv double %y, %z + %div1 = fdiv fast double %x, %z + %div2 = fdiv fast double %y, %z %mul = fmul fast double %div1, %div2 ret double %mul } @@ -999,8 +1037,8 @@ define double @fmul_fdivs_factor(double %x, double %y, double %z, double %w) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP2]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div1 = fdiv double %x, %y - %div2 = fdiv double %z, %w + %div1 = fdiv reassoc double %x, %y + %div2 = fdiv reassoc double %z, %w %mul = fmul reassoc double %div1, %div2 ret double %mul } @@ -1011,7 +1049,7 @@ define double @fmul_fdiv_factor(double %x, double %y, double %z) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div = fdiv double %x, %y + %div = fdiv reassoc double %x, %y %mul = fmul reassoc double %div, %z ret double %mul } @@ -1022,7 +1060,7 @@ define double @fmul_fdiv_factor_constant1(double %x, double %y) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc double [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret double [[MUL]] ; - %div = fdiv double %x, %y + %div = fdiv reassoc double %x, %y %mul = fmul reassoc double %div, 42.0 ret double %mul } @@ -1033,7 +1071,7 @@ define <2 x float> @fmul_fdiv_factor_constant2(<2 x float> %x, <2 x float> %y) { ; CHECK-NEXT: [[MUL:%.*]] = fdiv reassoc <2 x float> [[TMP1]], <float 4.200000e+01, float 1.200000e+01> ; CHECK-NEXT: ret <2 x float> [[MUL]] ; - %div = fdiv <2 x float> %x, <float 42.0, float 12.0> + %div = fdiv reassoc <2 x float> %x, <float 42.0, float 12.0> %mul = fmul reassoc <2 x float> %div, %y ret <2 x float> %mul } @@ -1232,7 +1270,7 @@ define <vscale x 2 x float> @mul_scalable_splat_zero(<vscale x 2 x float> %z) { ; CHECK-LABEL: @mul_scalable_splat_zero( ; CHECK-NEXT: ret <vscale x 2 x float> zeroinitializer ; - %shuf = shufflevector <vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 0.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer + %shuf = shufflevector <vscale x 2 x float> insertelement (<vscale x 2 x float> poison, float 0.0, i32 0), <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer %t3 = fmul fast <vscale x 2 x float> %shuf, %z ret <vscale x 2 x float> %t3 } @@ -1355,7 +1393,7 @@ define <3 x float> @mul_neg_zero_nnan_ninf_vec(<3 x float> nofpclass(inf nan) %a ; CHECK-NEXT: ret <3 x float> [[RET]] ; entry: - %ret = fmul <3 x float> %a, <float -0.0, float undef, float poison> + %ret = fmul <3 x float> %a, <float -0.0, float poison, float poison> ret <3 x float> %ret } diff --git a/llvm/test/Transforms/InstCombine/fneg-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-as-int.ll index d28e599..e3067b0 100644 --- a/llvm/test/Transforms/InstCombine/fneg-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-as-int.ll @@ -139,15 +139,15 @@ define <2 x i32> @not_fneg_as_int_v2f32_nonsplat(<2 x float> %x) { ret <2 x i32> %xor } -define <3 x i32> @fneg_as_int_v3f32_undef(<3 x float> %x) { -; CHECK-LABEL: define <3 x i32> @fneg_as_int_v3f32_undef +define <3 x i32> @fneg_as_int_v3f32_poison(<3 x float> %x) { +; CHECK-LABEL: define <3 x i32> @fneg_as_int_v3f32_poison ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = fneg <3 x float> [[X]] ; CHECK-NEXT: [[XOR:%.*]] = bitcast <3 x float> [[TMP1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[XOR]] ; %bc = bitcast <3 x float> %x to <3 x i32> - %xor = xor <3 x i32> %bc, <i32 -2147483648, i32 undef, i32 -2147483648> + %xor = xor <3 x i32> %bc, <i32 -2147483648, i32 poison, i32 -2147483648> ret <3 x i32> %xor } diff --git a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll index 9aa8d49..8c3e695 100644 --- a/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll +++ b/llvm/test/Transforms/InstCombine/fneg-fabs-as-int.ll @@ -158,8 +158,8 @@ define <2 x i32> @not_fneg_fabs_as_int_v2f32_nonsplat(<2 x float> %x) { ret <2 x i32> %or } -define <3 x i32> @fneg_fabs_as_int_v3f32_undef(<3 x float> %x) { -; CHECK-LABEL: define <3 x i32> @fneg_fabs_as_int_v3f32_undef +define <3 x i32> @fneg_fabs_as_int_v3f32_poison(<3 x float> %x) { +; CHECK-LABEL: define <3 x i32> @fneg_fabs_as_int_v3f32_poison ; CHECK-SAME: (<3 x float> [[X:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = call <3 x float> @llvm.fabs.v3f32(<3 x float> [[X]]) ; CHECK-NEXT: [[TMP2:%.*]] = fneg <3 x float> [[TMP1]] @@ -167,7 +167,7 @@ define <3 x i32> @fneg_fabs_as_int_v3f32_undef(<3 x float> %x) { ; CHECK-NEXT: ret <3 x i32> [[OR]] ; %bc = bitcast <3 x float> %x to <3 x i32> - %or = or <3 x i32> %bc, <i32 -2147483648, i32 undef, i32 -2147483648> + %or = or <3 x i32> %bc, <i32 -2147483648, i32 poison, i32 -2147483648> ret <3 x i32> %or } diff --git a/llvm/test/Transforms/InstCombine/fneg.ll b/llvm/test/Transforms/InstCombine/fneg.ll index ed68ba5..7c9289c 100644 --- a/llvm/test/Transforms/InstCombine/fneg.ll +++ b/llvm/test/Transforms/InstCombine/fneg.ll @@ -87,24 +87,24 @@ define float @fmul_fneg_extra_use(float %x) { ret float %r } -; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything. +; Try a vector. Use special constants (NaN, INF, poison) because they don't change anything. define <4 x double> @fmul_fsub_vec(<4 x double> %x) { ; CHECK-LABEL: @fmul_fsub_vec( -; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], <double -4.200000e+01, double 0xFFF8000000000000, double 0xFFF0000000000000, double undef> +; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], <double -4.200000e+01, double 0xFFF8000000000000, double 0xFFF0000000000000, double poison> ; CHECK-NEXT: ret <4 x double> [[R]] ; - %m = fmul <4 x double> %x, <double 42.0, double 0x7FF8000000000000, double 0x7FF0000000000000, double undef> + %m = fmul <4 x double> %x, <double 42.0, double 0x7FF8000000000000, double 0x7FF0000000000000, double poison> %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m ret <4 x double> %r } define <4 x double> @fmul_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fmul_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], <double -4.200000e+01, double 0xFFF8000000000000, double 0xFFF0000000000000, double undef> +; CHECK-NEXT: [[R:%.*]] = fmul <4 x double> [[X:%.*]], <double -4.200000e+01, double 0xFFF8000000000000, double 0xFFF0000000000000, double poison> ; CHECK-NEXT: ret <4 x double> [[R]] ; - %m = fmul <4 x double> %x, <double 42.0, double 0x7FF8000000000000, double 0x7FF0000000000000, double undef> + %m = fmul <4 x double> %x, <double 42.0, double 0x7FF8000000000000, double 0x7FF0000000000000, double poison> %r = fneg <4 x double> %m ret <4 x double> %r } @@ -181,24 +181,24 @@ define float @fdiv_op1_constant_fneg_extra_use(float %x) { ret float %r } -; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything. +; Try a vector. Use special constants (NaN, INF, poison) because they don't change anything. define <4 x double> @fdiv_op1_constant_fsub_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op1_constant_fsub_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], <double 4.200000e+01, double 0x7FF800000ABCD000, double 0x7FF0000000000000, double undef> +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], <double 4.200000e+01, double 0x7FF800000ABCD000, double 0x7FF0000000000000, double poison> ; CHECK-NEXT: ret <4 x double> [[R]] ; - %d = fdiv <4 x double> %x, <double -42.0, double 0xFFF800000ABCD000, double 0xFFF0000000000000, double undef> + %d = fdiv <4 x double> %x, <double -42.0, double 0xFFF800000ABCD000, double 0xFFF0000000000000, double poison> %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %d ret <4 x double> %r } define <4 x double> @fdiv_op1_constant_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op1_constant_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], <double 4.200000e+01, double 0x7FF800000ABCD000, double 0x7FF0000000000000, double undef> +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> [[X:%.*]], <double 4.200000e+01, double 0x7FF800000ABCD000, double 0x7FF0000000000000, double poison> ; CHECK-NEXT: ret <4 x double> [[R]] ; - %d = fdiv <4 x double> %x, <double -42.0, double 0xFFF800000ABCD000, double 0xFFF0000000000000, double undef> + %d = fdiv <4 x double> %x, <double -42.0, double 0xFFF800000ABCD000, double 0xFFF0000000000000, double poison> %r = fneg <4 x double> %d ret <4 x double> %r } @@ -335,24 +335,24 @@ define float @fdiv_op0_constant_fneg_extra_use(float %x) { ret float %r } -; Try a vector. Use special constants (NaN, INF, undef) because they don't change anything. +; Try a vector. Use special constants (NaN, INF, poison) because they don't change anything. define <4 x double> @fdiv_op0_constant_fsub_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op0_constant_fsub_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> <double 4.200000e+01, double 0xFFF8000000000000, double 0x7FF0000000000000, double undef>, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> <double 4.200000e+01, double 0xFFF8000000000000, double 0x7FF0000000000000, double poison>, [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; - %d = fdiv <4 x double> <double -42.0, double 0x7FF8000000000000, double 0xFFF0000000000000, double undef>, %x + %d = fdiv <4 x double> <double -42.0, double 0x7FF8000000000000, double 0xFFF0000000000000, double poison>, %x %r = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %d ret <4 x double> %r } define <4 x double> @fdiv_op0_constant_fneg_vec(<4 x double> %x) { ; CHECK-LABEL: @fdiv_op0_constant_fneg_vec( -; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> <double 4.200000e+01, double 0xFFF8000000000000, double 0x7FF0000000000000, double undef>, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = fdiv <4 x double> <double 4.200000e+01, double 0xFFF8000000000000, double 0x7FF0000000000000, double poison>, [[X:%.*]] ; CHECK-NEXT: ret <4 x double> [[R]] ; - %d = fdiv <4 x double> <double -42.0, double 0x7FF8000000000000, double 0xFFF0000000000000, double undef>, %x + %d = fdiv <4 x double> <double -42.0, double 0x7FF8000000000000, double 0xFFF0000000000000, double poison>, %x %r = fneg <4 x double> %d ret <4 x double> %r } @@ -584,11 +584,11 @@ define <2 x float> @fneg_nsz_fadd_constant_vec(<2 x float> %x) { define <2 x float> @fake_fneg_nsz_fadd_constant_vec(<2 x float> %x) { ; CHECK-LABEL: @fake_fneg_nsz_fadd_constant_vec( -; CHECK-NEXT: [[R:%.*]] = fsub nsz <2 x float> <float -4.200000e+01, float undef>, [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = fsub nsz <2 x float> <float -4.200000e+01, float poison>, [[X:%.*]] ; CHECK-NEXT: ret <2 x float> [[R]] ; - %a = fadd <2 x float> %x, <float 42.0, float undef> - %r = fsub nsz <2 x float> <float undef, float -0.0>, %a + %a = fadd <2 x float> %x, <float 42.0, float poison> + %r = fsub nsz <2 x float> <float poison, float -0.0>, %a ret <2 x float> %r } diff --git a/llvm/test/Transforms/InstCombine/fold-inc-of-add-of-not-x-and-y-to-sub-x-from-y.ll b/llvm/test/Transforms/InstCombine/fold-inc-of-add-of-not-x-and-y-to-sub-x-from-y.ll index b482cfd..1fd570b 100644 --- a/llvm/test/Transforms/InstCombine/fold-inc-of-add-of-not-x-and-y-to-sub-x-from-y.ll +++ b/llvm/test/Transforms/InstCombine/fold-inc-of-add-of-not-x-and-y-to-sub-x-from-y.ll @@ -36,36 +36,36 @@ define <4 x i32> @t1_vec_splat(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %t2 } -define <4 x i32> @t2_vec_undef0(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @t2_vec_undef0( +define <4 x i32> @t2_vec_poison0(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @t2_vec_poison0( ; CHECK-NEXT: [[T2:%.*]] = sub <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T2]] ; - %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 undef, i32 -1> + %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 poison, i32 -1> %t1 = add <4 x i32> %t0, %y %t2 = add <4 x i32> %t1, <i32 1, i32 1, i32 1, i32 1> ret <4 x i32> %t2 } -define <4 x i32> @t3_vec_undef1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @t3_vec_undef1( +define <4 x i32> @t3_vec_poison1(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @t3_vec_poison1( ; CHECK-NEXT: [[T2:%.*]] = sub <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T2]] ; %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> %t1 = add <4 x i32> %t0, %y - %t2 = add <4 x i32> %t1, <i32 1, i32 1, i32 undef, i32 1> + %t2 = add <4 x i32> %t1, <i32 1, i32 1, i32 poison, i32 1> ret <4 x i32> %t2 } -define <4 x i32> @t4_vec_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @t4_vec_undef2( +define <4 x i32> @t4_vec_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @t4_vec_poison2( ; CHECK-NEXT: [[T2:%.*]] = sub <4 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T2]] ; - %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 undef, i32 -1> + %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 poison, i32 -1> %t1 = add <4 x i32> %t0, %y - %t2 = add <4 x i32> %t1, <i32 1, i32 1, i32 undef, i32 1> + %t2 = add <4 x i32> %t1, <i32 1, i32 1, i32 poison, i32 1> ret <4 x i32> %t2 } diff --git a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll index 6f311f0..af580ba 100644 --- a/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll +++ b/llvm/test/Transforms/InstCombine/fold-sub-of-not-to-inc-of-add.ll @@ -50,13 +50,13 @@ define <4 x i32> @p1_vector_splat(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %t1 } -define <4 x i32> @p2_vector_undef(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @p2_vector_undef( +define <4 x i32> @p2_vector_poison(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @p2_vector_poison( ; CHECK-NEXT: [[T0_NEG:%.*]] = add <4 x i32> [[X:%.*]], <i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: [[T1:%.*]] = add <4 x i32> [[T0_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T1]] ; - %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 undef, i32 -1> + %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 poison, i32 -1> %t1 = sub <4 x i32> %y, %t0 ret <4 x i32> %t1 } diff --git a/llvm/test/Transforms/InstCombine/fpcast.ll b/llvm/test/Transforms/InstCombine/fpcast.ll index ac4b88f..69daac7 100644 --- a/llvm/test/Transforms/InstCombine/fpcast.ll +++ b/llvm/test/Transforms/InstCombine/fpcast.ll @@ -51,13 +51,13 @@ define half @unary_fneg_fptrunc(float %a) { ret half %c } -define <2 x half> @fneg_fptrunc_vec_undef(<2 x float> %a) { -; CHECK-LABEL: @fneg_fptrunc_vec_undef( +define <2 x half> @fneg_fptrunc_vec_poison(<2 x float> %a) { +; CHECK-LABEL: @fneg_fptrunc_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = fptrunc <2 x float> [[A:%.*]] to <2 x half> ; CHECK-NEXT: [[C:%.*]] = fneg <2 x half> [[TMP1]] ; CHECK-NEXT: ret <2 x half> [[C]] ; - %b = fsub <2 x float> <float -0.0, float undef>, %a + %b = fsub <2 x float> <float -0.0, float poison>, %a %c = fptrunc <2 x float> %b to <2 x half> ret <2 x half> %c } @@ -170,7 +170,7 @@ define half @sint_to_fptrunc(i32 %x) { define half @masked_sint_to_fptrunc1(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = and i32 %x, 16777215 @@ -182,7 +182,7 @@ define half @masked_sint_to_fptrunc1(i32 %x) { define half @masked_sint_to_fptrunc2(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = lshr i32 %x, 8 @@ -194,7 +194,7 @@ define half @masked_sint_to_fptrunc2(i32 %x) { define half @masked_sint_to_fptrunc3(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fptrunc3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half ; CHECK-NEXT: ret half [[R]] ; @@ -218,7 +218,7 @@ define double @sint_to_fpext(i32 %x) { define double @masked_sint_to_fpext1(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = and i32 %x, 16777215 @@ -230,7 +230,7 @@ define double @masked_sint_to_fpext1(i32 %x) { define double @masked_sint_to_fpext2(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = lshr i32 %x, 8 @@ -242,7 +242,7 @@ define double @masked_sint_to_fpext2(i32 %x) { define double @masked_sint_to_fpext3(i32 %x) { ; CHECK-LABEL: @masked_sint_to_fpext3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = sitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double ; CHECK-NEXT: ret double [[R]] ; @@ -266,7 +266,7 @@ define half @uint_to_fptrunc(i32 %x) { define half @masked_uint_to_fptrunc1(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = and i32 %x, 16777215 @@ -278,7 +278,7 @@ define half @masked_uint_to_fptrunc1(i32 %x) { define half @masked_uint_to_fptrunc2(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to half +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to half ; CHECK-NEXT: ret half [[R]] ; %m = lshr i32 %x, 8 @@ -290,7 +290,7 @@ define half @masked_uint_to_fptrunc2(i32 %x) { define half @masked_uint_to_fptrunc3(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fptrunc3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fptrunc float [[F]] to half ; CHECK-NEXT: ret half [[R]] ; @@ -314,7 +314,7 @@ define double @uint_to_fpext(i32 %x) { define double @masked_uint_to_fpext1(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext1( ; CHECK-NEXT: [[M:%.*]] = and i32 [[X:%.*]], 16777215 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = and i32 %x, 16777215 @@ -326,7 +326,7 @@ define double @masked_uint_to_fpext1(i32 %x) { define double @masked_uint_to_fpext2(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext2( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 8 -; CHECK-NEXT: [[R:%.*]] = uitofp i32 [[M]] to double +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[M]] to double ; CHECK-NEXT: ret double [[R]] ; %m = lshr i32 %x, 8 @@ -338,7 +338,7 @@ define double @masked_uint_to_fpext2(i32 %x) { define double @masked_uint_to_fpext3(i32 %x) { ; CHECK-LABEL: @masked_uint_to_fpext3( ; CHECK-NEXT: [[M:%.*]] = lshr i32 [[X:%.*]], 7 -; CHECK-NEXT: [[F:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[F:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[R:%.*]] = fpext float [[F]] to double ; CHECK-NEXT: ret double [[R]] ; diff --git a/llvm/test/Transforms/InstCombine/fsub.ll b/llvm/test/Transforms/InstCombine/fsub.ll index 6e13c33..f1e7086 100644 --- a/llvm/test/Transforms/InstCombine/fsub.ll +++ b/llvm/test/Transforms/InstCombine/fsub.ll @@ -153,12 +153,12 @@ define <2 x float> @constant_op1_vec(<2 x float> %x, <2 x float> %y) { ret <2 x float> %r } -define <2 x float> @constant_op1_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @constant_op1_vec_undef( -; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[X:%.*]], <float undef, float 4.200000e+01> +define <2 x float> @constant_op1_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @constant_op1_vec_poison( +; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[X:%.*]], <float poison, float 4.200000e+01> ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = fsub <2 x float> %x, <float undef, float -42.0> + %r = fsub <2 x float> %x, <float poison, float -42.0> ret <2 x float> %r } @@ -204,12 +204,12 @@ define <2 x float> @unary_neg_op1_vec(<2 x float> %x, <2 x float> %y) { ret <2 x float> %r } -define <2 x float> @neg_op1_vec_undef(<2 x float> %x, <2 x float> %y) { -; CHECK-LABEL: @neg_op1_vec_undef( +define <2 x float> @neg_op1_vec_poison(<2 x float> %x, <2 x float> %y) { +; CHECK-LABEL: @neg_op1_vec_poison( ; CHECK-NEXT: [[R:%.*]] = fadd <2 x float> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x float> [[R]] ; - %negy = fsub <2 x float> <float -0.0, float undef>, %y + %negy = fsub <2 x float> <float -0.0, float poison>, %y %r = fsub <2 x float> %x, %negy ret <2 x float> %r } diff --git a/llvm/test/Transforms/InstCombine/funnel.ll b/llvm/test/Transforms/InstCombine/funnel.ll index 162519e..a54e6e4 100644 --- a/llvm/test/Transforms/InstCombine/funnel.ll +++ b/llvm/test/Transforms/InstCombine/funnel.ll @@ -43,24 +43,24 @@ define <2 x i16> @fshl_v2i16_constant_splat(<2 x i16> %x, <2 x i16> %y) { ret <2 x i16> %r } -define <2 x i16> @fshl_v2i16_constant_splat_undef0(<2 x i16> %x, <2 x i16> %y) { -; CHECK-LABEL: @fshl_v2i16_constant_splat_undef0( +define <2 x i16> @fshl_v2i16_constant_splat_poison0(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: @fshl_v2i16_constant_splat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>) ; CHECK-NEXT: ret <2 x i16> [[R]] ; - %shl = shl <2 x i16> %x, <i16 undef, i16 1> + %shl = shl <2 x i16> %x, <i16 poison, i16 1> %shr = lshr <2 x i16> %y, <i16 15, i16 15> %r = or <2 x i16> %shl, %shr ret <2 x i16> %r } -define <2 x i16> @fshl_v2i16_constant_splat_undef1(<2 x i16> %x, <2 x i16> %y) { -; CHECK-LABEL: @fshl_v2i16_constant_splat_undef1( +define <2 x i16> @fshl_v2i16_constant_splat_poison1(<2 x i16> %x, <2 x i16> %y) { +; CHECK-LABEL: @fshl_v2i16_constant_splat_poison1( ; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[Y:%.*]], <2 x i16> <i16 1, i16 1>) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, <i16 1, i16 1> - %shr = lshr <2 x i16> %y, <i16 15, i16 undef> + %shr = lshr <2 x i16> %y, <i16 15, i16 poison> %r = or <2 x i16> %shl, %shr ret <2 x i16> %r } @@ -78,30 +78,30 @@ define <2 x i17> @fshr_v2i17_constant_splat(<2 x i17> %x, <2 x i17> %y) { ret <2 x i17> %r } -define <2 x i17> @fshr_v2i17_constant_splat_undef0(<2 x i17> %x, <2 x i17> %y) { -; CHECK-LABEL: @fshr_v2i17_constant_splat_undef0( +define <2 x i17> @fshr_v2i17_constant_splat_poison0(<2 x i17> %x, <2 x i17> %y) { +; CHECK-LABEL: @fshr_v2i17_constant_splat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>) ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shr = lshr <2 x i17> %x, <i17 12, i17 undef> - %shl = shl <2 x i17> %y, <i17 undef, i17 5> + %shr = lshr <2 x i17> %x, <i17 12, i17 poison> + %shl = shl <2 x i17> %y, <i17 poison, i17 5> %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } -define <2 x i17> @fshr_v2i17_constant_splat_undef1(<2 x i17> %x, <2 x i17> %y) { -; CHECK-LABEL: @fshr_v2i17_constant_splat_undef1( +define <2 x i17> @fshr_v2i17_constant_splat_poison1(<2 x i17> %x, <2 x i17> %y) { +; CHECK-LABEL: @fshr_v2i17_constant_splat_poison1( ; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[Y:%.*]], <2 x i17> [[X:%.*]], <2 x i17> <i17 5, i17 5>) ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shr = lshr <2 x i17> %x, <i17 12, i17 undef> - %shl = shl <2 x i17> %y, <i17 5, i17 undef> + %shr = lshr <2 x i17> %x, <i17 12, i17 poison> + %shl = shl <2 x i17> %y, <i17 5, i17 poison> %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } ; Allow arbitrary shift constants. -; Support undef elements. +; Support poison elements. define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; CHECK-LABEL: @fshr_v2i32_constant_nonsplat( @@ -114,24 +114,24 @@ define <2 x i32> @fshr_v2i32_constant_nonsplat(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %r } -define <2 x i32> @fshr_v2i32_constant_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef0( +define <2 x i32> @fshr_v2i32_constant_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 0, i32 13>) ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %shr = lshr <2 x i32> %x, <i32 undef, i32 19> + %shr = lshr <2 x i32> %x, <i32 poison, i32 19> %shl = shl <2 x i32> %y, <i32 15, i32 13> %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } -define <2 x i32> @fshr_v2i32_constant_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_undef1( -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 0>) +define <2 x i32> @fshr_v2i32_constant_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @fshr_v2i32_constant_nonsplat_poison1( +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> <i32 15, i32 poison>) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shr = lshr <2 x i32> %x, <i32 17, i32 19> - %shl = shl <2 x i32> %y, <i32 15, i32 undef> + %shl = shl <2 x i32> %y, <i32 15, i32 poison> %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } @@ -147,13 +147,13 @@ define <2 x i36> @fshl_v2i36_constant_nonsplat(<2 x i36> %x, <2 x i36> %y) { ret <2 x i36> %r } -define <3 x i36> @fshl_v3i36_constant_nonsplat_undef0(<3 x i36> %x, <3 x i36> %y) { -; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_undef0( -; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 0>) +define <3 x i36> @fshl_v3i36_constant_nonsplat_poison0(<3 x i36> %x, <3 x i36> %y) { +; CHECK-LABEL: @fshl_v3i36_constant_nonsplat_poison0( +; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[Y:%.*]], <3 x i36> <i36 21, i36 11, i36 poison>) ; CHECK-NEXT: ret <3 x i36> [[R]] ; - %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef> - %shr = lshr <3 x i36> %y, <i36 15, i36 25, i36 undef> + %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison> + %shr = lshr <3 x i36> %y, <i36 15, i36 25, i36 poison> %r = or <3 x i36> %shl, %shr ret <3 x i36> %r } diff --git a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll index 12a81f0..40caa57 100644 --- a/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll +++ b/llvm/test/Transforms/InstCombine/get-lowbitmask-upto-and-including-bit.ll @@ -41,36 +41,36 @@ define <2 x i8> @t2_vec(<2 x i8> %x) { %mask = or <2 x i8> %lowbitmask, %bitmask ret <2 x i8> %mask } -define <3 x i8> @t3_vec_undef0(<3 x i8> %x) { -; CHECK-LABEL: @t3_vec_undef0( +define <3 x i8> @t3_vec_poison0(<3 x i8> %x) { +; CHECK-LABEL: @t3_vec_poison0( ; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; - %bitmask = shl <3 x i8> <i8 1, i8 undef, i8 1>, %x + %bitmask = shl <3 x i8> <i8 1, i8 poison, i8 1>, %x %lowbitmask = add <3 x i8> %bitmask, <i8 -1, i8 -1, i8 -1> %mask = or <3 x i8> %lowbitmask, %bitmask ret <3 x i8> %mask } -define <3 x i8> @t4_vec_undef1(<3 x i8> %x) { -; CHECK-LABEL: @t4_vec_undef1( +define <3 x i8> @t4_vec_poison1(<3 x i8> %x) { +; CHECK-LABEL: @t4_vec_poison1( ; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; %bitmask = shl <3 x i8> <i8 1, i8 1, i8 1>, %x - %lowbitmask = add <3 x i8> %bitmask, <i8 -1, i8 undef, i8 -1> + %lowbitmask = add <3 x i8> %bitmask, <i8 -1, i8 poison, i8 -1> %mask = or <3 x i8> %lowbitmask, %bitmask ret <3 x i8> %mask } -define <3 x i8> @t5_vec_undef2(<3 x i8> %x) { -; CHECK-LABEL: @t5_vec_undef2( +define <3 x i8> @t5_vec_poison2(<3 x i8> %x) { +; CHECK-LABEL: @t5_vec_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = sub <3 x i8> <i8 7, i8 7, i8 7>, [[X:%.*]] ; CHECK-NEXT: [[MASK:%.*]] = lshr <3 x i8> <i8 -1, i8 -1, i8 -1>, [[TMP1]] ; CHECK-NEXT: ret <3 x i8> [[MASK]] ; - %bitmask = shl <3 x i8> <i8 1, i8 1, i8 undef>, %x - %lowbitmask = add <3 x i8> %bitmask, <i8 -1, i8 undef, i8 -1> + %bitmask = shl <3 x i8> <i8 1, i8 1, i8 poison>, %x + %lowbitmask = add <3 x i8> %bitmask, <i8 -1, i8 poison, i8 -1> %mask = or <3 x i8> %lowbitmask, %bitmask ret <3 x i8> %mask } diff --git a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll index c8f1459..e4cae13 100644 --- a/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll +++ b/llvm/test/Transforms/InstCombine/hoist-negation-out-of-bias-calculation.ll @@ -55,14 +55,14 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %negbias } -define <2 x i8> @t3_vec_undef(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @t3_vec_undef( +define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @t3_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i8> [[Y:%.*]], <i8 -1, i8 -1> ; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: [[NEGBIAS:%.*]] = sub <2 x i8> zeroinitializer, [[TMP2]] ; CHECK-NEXT: ret <2 x i8> [[NEGBIAS]] ; - %negy = sub <2 x i8> <i8 0, i8 undef>, %y + %negy = sub <2 x i8> <i8 0, i8 poison>, %y %unbiasedx = and <2 x i8> %negy, %x %negbias = sub <2 x i8> %unbiasedx, %x ret <2 x i8> %negbias diff --git a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll index e024285..2217666 100644 --- a/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll +++ b/llvm/test/Transforms/InstCombine/hoist-not-from-ashr-operand.ll @@ -41,14 +41,14 @@ define <2 x i8> @t2_vec(<2 x i8> %x, <2 x i8> %y) { %ashr = ashr <2 x i8> %not_x, %y ret <2 x i8> %ashr } -; Note that we must sanitize undef elts of -1 constant to -1 or 0. -define <2 x i8> @t3_vec_undef(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @t3_vec_undef( +; Note that we must sanitize poison elts of -1 constant to -1 or 0. +define <2 x i8> @t3_vec_poison(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @t3_vec_poison( ; CHECK-NEXT: [[NOT_X_NOT:%.*]] = ashr <2 x i8> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[ASHR:%.*]] = xor <2 x i8> [[NOT_X_NOT]], <i8 -1, i8 -1> ; CHECK-NEXT: ret <2 x i8> [[ASHR]] ; - %not_x = xor <2 x i8> %x, <i8 -1, i8 undef> + %not_x = xor <2 x i8> %x, <i8 -1, i8 poison> %ashr = ashr <2 x i8> %not_x, %y ret <2 x i8> %ashr } diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index 5adf476..32ef626 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -56,8 +56,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] -; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], <i8 -1, i8 -1, i8 -1> +; CHECK-NEXT: [[R:%.*]] = icmp uge <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits diff --git a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index 7f46038..27b02c8 100644 --- a/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-uge-of-not-of-shl-allones-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -40,38 +40,38 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ret <2 x i1> %r } -define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef0( +define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %bits %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1> %r = icmp uge <3 x i8> %t1, %val ret <3 x i1> %r } -define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef1( +define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison1( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %bits - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %r = icmp uge <3 x i8> %t1, %val ret <3 x i1> %r } -define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef2( +define <3 x i1> @p2_vec_poison2(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison2( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %bits + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %r = icmp uge <3 x i8> %t1, %val ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll index 550e8bb..72cfb5a 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ugt-of-shl-1-by-bits-and-val-to-icmp-eq-of-lshr-val-by-bits-and-0.ll @@ -38,13 +38,13 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ret <2 x i1> %r } -define <3 x i1> @p2_vec_undef(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef( +define <3 x i1> @p2_vec_poison(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp eq <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits + %t0 = shl <3 x i8> <i8 1, i8 poison, i8 1>, %bits %r = icmp ugt <3 x i8> %t0, %val ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index 26b667d..79e6914 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ule-of-shl-1-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -38,13 +38,13 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ret <2 x i1> %r } -define <3 x i1> @p2_vec_undef(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef( +define <3 x i1> @p2_vec_poison(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits + %t0 = shl <3 x i8> <i8 1, i8 poison, i8 1>, %bits %r = icmp ule <3 x i8> %t0, %val ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index dd353d4..25894a2 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-add-of-shl-one-by-bits-to-allones-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -56,8 +56,8 @@ define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { ; CHECK-LABEL: @p2_vec_undef0( ; CHECK-NEXT: [[T0:%.*]] = shl <3 x i8> <i8 1, i8 undef, i8 1>, [[BITS:%.*]] ; CHECK-NEXT: call void @use3i8(<3 x i8> [[T0]]) -; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS]] -; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer +; CHECK-NEXT: [[T1:%.*]] = add <3 x i8> [[T0]], <i8 -1, i8 -1, i8 -1> +; CHECK-NEXT: [[R:%.*]] = icmp ult <3 x i8> [[T1]], [[VAL:%.*]] ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> <i8 1, i8 undef, i8 1>, %bits diff --git a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll index c7a45c5..8441033 100644 --- a/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll +++ b/llvm/test/Transforms/InstCombine/icmp-ult-of-not-of-shl-allones-by-bits-and-val-to-icmp-ne-of-lshr-val-by-bits-and-0.ll @@ -40,38 +40,38 @@ define <2 x i1> @p1_vec(<2 x i8> %val, <2 x i8> %bits) { ret <2 x i1> %r } -define <3 x i1> @p2_vec_undef0(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef0( +define <3 x i1> @p2_vec_poison0(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison0( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %bits %t1 = xor <3 x i8> %t0, <i8 -1, i8 -1, i8 -1> %r = icmp ult <3 x i8> %t1, %val ret <3 x i1> %r } -define <3 x i1> @p2_vec_undef1(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef1( +define <3 x i1> @p2_vec_poison1(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison1( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; %t0 = shl <3 x i8> <i8 -1, i8 -1, i8 -1>, %bits - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %r = icmp ult <3 x i8> %t1, %val ret <3 x i1> %r } -define <3 x i1> @p2_vec_undef2(<3 x i8> %val, <3 x i8> %bits) { -; CHECK-LABEL: @p2_vec_undef2( +define <3 x i1> @p2_vec_poison2(<3 x i8> %val, <3 x i8> %bits) { +; CHECK-LABEL: @p2_vec_poison2( ; CHECK-NEXT: [[VAL_HIGHBITS:%.*]] = lshr <3 x i8> [[VAL:%.*]], [[BITS:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ne <3 x i8> [[VAL_HIGHBITS]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[R]] ; - %t0 = shl <3 x i8> <i8 -1, i8 undef, i8 -1>, %bits - %t1 = xor <3 x i8> %t0, <i8 -1, i8 undef, i8 -1> + %t0 = shl <3 x i8> <i8 -1, i8 poison, i8 -1>, %bits + %t1 = xor <3 x i8> %t0, <i8 -1, i8 poison, i8 -1> %r = icmp ult <3 x i8> %t1, %val ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 10ab1fe..31093c7 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -1790,14 +1790,14 @@ define <2 x i1> @icmp_add20_eq_add57_splat(<2 x i32> %x, <2 x i32> %y) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_add20_eq_add57_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @icmp_add20_eq_add57_undef( +define <2 x i1> @icmp_add20_eq_add57_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @icmp_add20_eq_add57_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[Y:%.*]], <i32 37, i32 37> ; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %1 = add <2 x i32> %x, <i32 20, i32 20> - %2 = add <2 x i32> %y, <i32 57, i32 undef> + %2 = add <2 x i32> %y, <i32 57, i32 poison> %cmp = icmp eq <2 x i32> %1, %2 ret <2 x i1> %cmp } @@ -1838,14 +1838,14 @@ define <2 x i1> @icmp_sub57_ne_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_sub57_ne_sub20_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_undef( +define <2 x i1> @icmp_sub57_ne_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @icmp_sub57_ne_sub20_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], <i32 -37, i32 -37> ; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %1 = add <2 x i32> %x, <i32 -57, i32 undef> - %2 = add <2 x i32> %y, <i32 -20, i32 undef> + %1 = add <2 x i32> %x, <i32 -57, i32 poison> + %2 = add <2 x i32> %y, <i32 -20, i32 poison> %cmp = icmp ne <2 x i32> %1, %2 ret <2 x i1> %cmp } @@ -1926,14 +1926,14 @@ define <2 x i1> @icmp_add20_sge_add57_splat(<2 x i32> %x, <2 x i32> %y) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_add20_sge_add57_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @icmp_add20_sge_add57_undef( +define <2 x i1> @icmp_add20_sge_add57_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @icmp_add20_sge_add57_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[Y:%.*]], <i32 37, i32 37> ; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i32> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %1 = add nsw <2 x i32> %x, <i32 20, i32 20> - %2 = add nsw <2 x i32> %y, <i32 57, i32 undef> + %2 = add nsw <2 x i32> %y, <i32 57, i32 poison> %cmp = icmp sge <2 x i32> %1, %2 ret <2 x i1> %cmp } @@ -1975,14 +1975,14 @@ define <2 x i1> @icmp_sub57_sge_sub20_splat(<2 x i32> %x, <2 x i32> %y) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_sub57_sge_sub20_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @icmp_sub57_sge_sub20_vec_undef( +define <2 x i1> @icmp_sub57_sge_sub20_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @icmp_sub57_sge_sub20_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[X:%.*]], <i32 -37, i32 -37> ; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; - %1 = add nsw <2 x i32> %x, <i32 -57, i32 undef> - %2 = add nsw <2 x i32> %y, <i32 -20, i32 undef> + %1 = add nsw <2 x i32> %x, <i32 -57, i32 poison> + %2 = add nsw <2 x i32> %y, <i32 -20, i32 poison> %cmp = icmp sge <2 x i32> %1, %2 ret <2 x i1> %cmp } @@ -2557,13 +2557,13 @@ define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_uniform(<2 x i64> %a, <2 x i64> %b) ret <2 x i1> %3 } -define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_undef(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_undef( +define <2 x i1> @or_icmp_eq_B_0_icmp_ult_A_B_poison(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1> ; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; - %1 = icmp eq <2 x i64> %b, <i64 0, i64 undef> + %1 = icmp eq <2 x i64> %b, <i64 0, i64 poison> %2 = icmp ult <2 x i64> %a, %b %3 = or <2 x i1> %1, %2 ret <2 x i1> %3 @@ -2606,14 +2606,14 @@ define <2 x i1> @or_icmp_ne_A_0_icmp_ne_B_0_uniform(<2 x i64> %a, <2 x i64> %b) ret <2 x i1> %3 } -define <2 x i1> @or_icmp_ne_A_0_icmp_ne_B_0_undef(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @or_icmp_ne_A_0_icmp_ne_B_0_undef( +define <2 x i1> @or_icmp_ne_A_0_icmp_ne_B_0_poison(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: @or_icmp_ne_A_0_icmp_ne_B_0_poison( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i64> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; - %1 = icmp ne <2 x i64> %a, <i64 0, i64 undef> - %2 = icmp ne <2 x i64> %b, <i64 0, i64 undef> + %1 = icmp ne <2 x i64> %a, <i64 0, i64 poison> + %2 = icmp ne <2 x i64> %b, <i64 0, i64 poison> %3 = or <2 x i1> %1, %2 ret <2 x i1> %3 } @@ -2803,13 +2803,13 @@ define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_uniform(<2 x i64> %a, <2 x i64> %b ret <2 x i1> %3 } -define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_undef(<2 x i64> %a, <2 x i64> %b) { -; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_undef( +define <2 x i1> @and_icmp_ne_B_0_icmp_uge_A_B_poison(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: @and_icmp_ne_B_0_icmp_uge_A_B_poison( ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i64> [[B:%.*]], <i64 -1, i64 -1> ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <2 x i64> [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; - %1 = icmp ne <2 x i64> %b, <i64 0, i64 undef> + %1 = icmp ne <2 x i64> %b, <i64 0, i64 poison> %2 = icmp uge <2 x i64> %a, %b %3 = and <2 x i1> %1, %2 ret <2 x i1> %3 @@ -3272,13 +3272,13 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform(<2 x i32> %x) { ret <2 x i1> %ret } -define <2 x i1> @icmp_and_or_lshr_cst_vec_undef(<2 x i32> %x) { -; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef( +define <2 x i1> @icmp_and_or_lshr_cst_vec_poison(<2 x i32> %x) { +; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], <i32 3, i32 poison> ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; - %shf = lshr <2 x i32> %x, <i32 1, i32 undef> + %shf = lshr <2 x i32> %x, <i32 1, i32 poison> %or = or <2 x i32> %shf, %x %and = and <2 x i32> %or, <i32 1, i32 1> %ret = icmp ne <2 x i32> %and, zeroinitializer @@ -3315,15 +3315,15 @@ define <2 x i1> @icmp_and_or_lshr_cst_vec_nonuniform_commute(<2 x i32> %xp) { ret <2 x i1> %ret } -define <2 x i1> @icmp_and_or_lshr_cst_vec_undef_commute(<2 x i32> %xp) { -; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_undef_commute( +define <2 x i1> @icmp_and_or_lshr_cst_vec_poison_commute(<2 x i32> %xp) { +; CHECK-LABEL: @icmp_and_or_lshr_cst_vec_poison_commute( ; CHECK-NEXT: [[X:%.*]] = srem <2 x i32> [[XP:%.*]], <i32 42, i32 42> ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X]], <i32 3, i32 poison> ; CHECK-NEXT: [[RET:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[RET]] ; %x = srem <2 x i32> %xp, <i32 42, i32 -42> ; prevent complexity-based canonicalization - %shf = lshr <2 x i32> %x, <i32 1, i32 undef> + %shf = lshr <2 x i32> %x, <i32 1, i32 poison> %or = or <2 x i32> %x, %shf %and = and <2 x i32> %or, <i32 1, i32 1> %ret = icmp ne <2 x i32> %and, zeroinitializer @@ -4360,7 +4360,7 @@ define <2 x i1> @signbit_false_logic(<2 x i5> %x) { ; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i5> [[X:%.*]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %dec = add <2 x i5> %x, <i5 -1, i5 undef> + %dec = add <2 x i5> %x, <i5 -1, i5 poison> %not = xor <2 x i5> %x, <i5 -1, i5 -1> %and = and <2 x i5> %dec, %not %r = icmp sgt <2 x i5> %and, <i5 -1, i5 -1> diff --git a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll index 7cef922..c7e0553 100644 --- a/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll +++ b/llvm/test/Transforms/InstCombine/integer-round-up-pow2-alignment.ll @@ -86,9 +86,9 @@ define <2 x i8> @t4_splat(<2 x i8> %x) { ret <2 x i8> %x.roundedup } -; Splat-with-undef -define <2 x i8> @t5_splat_undef_0b0001(<2 x i8> %x) { -; CHECK-LABEL: @t5_splat_undef_0b0001( +; Splat-with-poison +define <2 x i8> @t5_splat_poison_0b0001(<2 x i8> %x) { +; CHECK-LABEL: @t5_splat_poison_0b0001( ; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], <i8 -16, i8 -16> ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] @@ -96,43 +96,43 @@ define <2 x i8> @t5_splat_undef_0b0001(<2 x i8> %x) { %x.lowbits = and <2 x i8> %x, <i8 15, i8 15> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> %x.biased = add <2 x i8> %x, <i8 16, i8 16> - %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 undef> + %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 poison> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t5_splat_undef_0b0010(<2 x i8> %x) { -; CHECK-LABEL: @t5_splat_undef_0b0010( +define <2 x i8> @t5_splat_poison_0b0010(<2 x i8> %x) { +; CHECK-LABEL: @t5_splat_poison_0b0010( ; CHECK-NEXT: [[X_BIASED1:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED1]], <i8 -16, i8 -16> ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, <i8 15, i8 15> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> - %x.biased = add <2 x i8> %x, <i8 16, i8 undef> + %x.biased = add <2 x i8> %x, <i8 16, i8 poison> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -16> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t5_splat_undef_0b0100(<2 x i8> %x) { -; CHECK-LABEL: @t5_splat_undef_0b0100( +define <2 x i8> @t5_splat_poison_0b0100(<2 x i8> %x) { +; CHECK-LABEL: @t5_splat_poison_0b0100( ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16> ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, <i8 15, i8 15> - %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 undef> + %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 poison> %x.biased = add <2 x i8> %x, <i8 16, i8 16> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -16> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t5_splat_undef_0b1000(<2 x i8> %x) { -; CHECK-LABEL: @t5_splat_undef_0b1000( +define <2 x i8> @t5_splat_poison_0b1000(<2 x i8> %x) { +; CHECK-LABEL: @t5_splat_poison_0b1000( ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 15> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -16> ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; - %x.lowbits = and <2 x i8> %x, <i8 15, i8 undef> + %x.lowbits = and <2 x i8> %x, <i8 15, i8 poison> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> %x.biased = add <2 x i8> %x, <i8 16, i8 16> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -16> @@ -177,64 +177,64 @@ define <2 x i8> @t7_nonsplat_bias(<2 x i8> %x) { } ; Splat-in-disguise vector tests -define <2 x i8> @t8_nonsplat_masked_by_undef_0b0001(<2 x i8> %x) { -; CHECK-LABEL: @t8_nonsplat_masked_by_undef_0b0001( +define <2 x i8> @t8_nonsplat_masked_by_poison_0b0001(<2 x i8> %x) { +; CHECK-LABEL: @t8_nonsplat_masked_by_poison_0b0001( ; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 31> ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 16, i8 32> -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 undef> +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 poison> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, <i8 15, i8 31> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> %x.biased = add <2 x i8> %x, <i8 16, i8 32> - %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 undef> + %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 poison> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t8_nonsplat_masked_by_undef_0b0010(<2 x i8> %x) { -; CHECK-LABEL: @t8_nonsplat_masked_by_undef_0b0010( +define <2 x i8> @t8_nonsplat_masked_by_poison_0b0010(<2 x i8> %x) { +; CHECK-LABEL: @t8_nonsplat_masked_by_poison_0b0010( ; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 31> ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 16, i8 undef> +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 16, i8 poison> ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -32> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, <i8 15, i8 31> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> - %x.biased = add <2 x i8> %x, <i8 16, i8 undef> + %x.biased = add <2 x i8> %x, <i8 16, i8 poison> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -32> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t8_nonsplat_masked_by_undef_0b0100(<2 x i8> %x) { -; CHECK-LABEL: @t8_nonsplat_masked_by_undef_0b0100( +define <2 x i8> @t8_nonsplat_masked_by_poison_0b0100(<2 x i8> %x) { +; CHECK-LABEL: @t8_nonsplat_masked_by_poison_0b0100( ; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 31> -; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], <i8 0, i8 undef> +; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], <i8 0, i8 poison> ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 16, i8 32> ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -32> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; %x.lowbits = and <2 x i8> %x, <i8 15, i8 31> - %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 undef> + %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 poison> %x.biased = add <2 x i8> %x, <i8 16, i8 32> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -32> %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i8> %x, <2 x i8> %x.biased.highbits ret <2 x i8> %x.roundedup } -define <2 x i8> @t8_nonsplat_masked_by_undef_0b1000(<2 x i8> %x) { -; CHECK-LABEL: @t8_nonsplat_masked_by_undef_0b1000( -; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 undef> +define <2 x i8> @t8_nonsplat_masked_by_poison_0b1000(<2 x i8> %x) { +; CHECK-LABEL: @t8_nonsplat_masked_by_poison_0b1000( +; CHECK-NEXT: [[X_LOWBITS:%.*]] = and <2 x i8> [[X:%.*]], <i8 15, i8 poison> ; CHECK-NEXT: [[X_LOWBITS_ARE_ZERO:%.*]] = icmp eq <2 x i8> [[X_LOWBITS]], zeroinitializer ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i8> [[X]], <i8 16, i8 32> ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i8> [[X_BIASED]], <i8 -16, i8 -32> ; CHECK-NEXT: [[X_ROUNDEDUP:%.*]] = select <2 x i1> [[X_LOWBITS_ARE_ZERO]], <2 x i8> [[X]], <2 x i8> [[X_BIASED_HIGHBITS]] ; CHECK-NEXT: ret <2 x i8> [[X_ROUNDEDUP]] ; - %x.lowbits = and <2 x i8> %x, <i8 15, i8 undef> + %x.lowbits = and <2 x i8> %x, <i8 15, i8 poison> %x.lowbits.are.zero = icmp eq <2 x i8> %x.lowbits, <i8 0, i8 0> %x.biased = add <2 x i8> %x, <i8 16, i8 32> %x.biased.highbits = and <2 x i8> %x.biased, <i8 -16, i8 -32> @@ -442,28 +442,28 @@ define i8 @t17_oneuse(i8 %x) { define <2 x i4> @t18_replacement_0b0001(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0001( ; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], <i4 3, i4 3> -; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], <i4 -4, i4 undef> +; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], <i4 -4, i4 poison> ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; %x.lowbits = and <2 x i4> %x, <i4 3, i4 3> %x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 0> %x.biased = add <2 x i4> %x, <i4 3, i4 3> - %x.biased.highbits = and <2 x i4> %x.biased, <i4 -4, i4 undef> + %x.biased.highbits = and <2 x i4> %x.biased, <i4 -4, i4 poison> call void @use.v2i4(<2 x i4> %x.biased.highbits) %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i4> %x, <2 x i4> %x.biased.highbits ret <2 x i4> %x.roundedup } define <2 x i4> @t18_replacement_0b0010(<2 x i4> %x) { ; CHECK-LABEL: @t18_replacement_0b0010( -; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], <i4 3, i4 undef> +; CHECK-NEXT: [[X_BIASED:%.*]] = add <2 x i4> [[X:%.*]], <i4 3, i4 poison> ; CHECK-NEXT: [[X_BIASED_HIGHBITS:%.*]] = and <2 x i4> [[X_BIASED]], <i4 -4, i4 -4> ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; %x.lowbits = and <2 x i4> %x, <i4 3, i4 3> %x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 0> - %x.biased = add <2 x i4> %x, <i4 3, i4 undef> + %x.biased = add <2 x i4> %x, <i4 3, i4 poison> %x.biased.highbits = and <2 x i4> %x.biased, <i4 -4, i4 -4> call void @use.v2i4(<2 x i4> %x.biased.highbits) %x.roundedup = select <2 x i1> %x.lowbits.are.zero, <2 x i4> %x, <2 x i4> %x.biased.highbits @@ -477,7 +477,7 @@ define <2 x i4> @t18_replacement_0b0100(<2 x i4> %x) { ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; %x.lowbits = and <2 x i4> %x, <i4 3, i4 3> - %x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 undef> + %x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 poison> %x.biased = add <2 x i4> %x, <i4 3, i4 3> %x.biased.highbits = and <2 x i4> %x.biased, <i4 -4, i4 -4> call void @use.v2i4(<2 x i4> %x.biased.highbits) @@ -491,7 +491,7 @@ define <2 x i4> @t18_replacement_0b1000(<2 x i4> %x) { ; CHECK-NEXT: call void @use.v2i4(<2 x i4> [[X_BIASED_HIGHBITS]]) ; CHECK-NEXT: ret <2 x i4> [[X_BIASED_HIGHBITS]] ; - %x.lowbits = and <2 x i4> %x, <i4 3, i4 undef> + %x.lowbits = and <2 x i4> %x, <i4 3, i4 poison> %x.lowbits.are.zero = icmp eq <2 x i4> %x.lowbits, <i4 0, i4 0> %x.biased = add <2 x i4> %x, <i4 3, i4 3> %x.biased.highbits = and <2 x i4> %x.biased, <i4 -4, i4 -4> diff --git a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll index 4861132..a76662c 100644 --- a/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll +++ b/llvm/test/Transforms/InstCombine/invert-variable-mask-in-masked-merge-vector.ll @@ -20,14 +20,14 @@ define <2 x i4> @vector (<2 x i4> %x, <2 x i4> %y, <2 x i4> %m) { ret <2 x i4> %r } -define <3 x i4> @vector_undef (<3 x i4> %x, <3 x i4> %y, <3 x i4> %m) { -; CHECK-LABEL: @vector_undef( +define <3 x i4> @vector_poison (<3 x i4> %x, <3 x i4> %y, <3 x i4> %m) { +; CHECK-LABEL: @vector_poison( ; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], [[M:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; - %im = xor <3 x i4> %m, <i4 -1, i4 undef, i4 -1> + %im = xor <3 x i4> %m, <i4 -1, i4 poison, i4 -1> %n0 = xor <3 x i4> %x, %y %n1 = and <3 x i4> %n0, %im %r = xor <3 x i4> %n1, %y @@ -78,17 +78,17 @@ define <2 x i4> @in_constant_varx_6_invmask_nonsplat(<2 x i4> %x, <2 x i4> %mask ret <2 x i4> %r } -define <3 x i4> @in_constant_varx_6_invmask_undef(<3 x i4> %x, <3 x i4> %mask) { -; CHECK-LABEL: @in_constant_varx_6_invmask_undef( -; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], <i4 6, i4 undef, i4 7> +define <3 x i4> @in_constant_varx_6_invmask_poison(<3 x i4> %x, <3 x i4> %mask) { +; CHECK-LABEL: @in_constant_varx_6_invmask_poison( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[X:%.*]], <i4 6, i4 poison, i4 7> ; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], [[MASK:%.*]] ; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], [[X]] ; CHECK-NEXT: ret <3 x i4> [[R]] ; - %notmask = xor <3 x i4> %mask, <i4 -1, i4 undef, i4 -1> - %n0 = xor <3 x i4> %x, <i4 6, i4 undef, i4 7> ; %x + %notmask = xor <3 x i4> %mask, <i4 -1, i4 poison, i4 -1> + %n0 = xor <3 x i4> %x, <i4 6, i4 poison, i4 7> ; %x %n1 = and <3 x i4> %n0, %notmask - %r = xor <3 x i4> %n1, <i4 6, i4 undef, i4 7> + %r = xor <3 x i4> %n1, <i4 6, i4 poison, i4 7> ret <3 x i4> %r } @@ -133,15 +133,15 @@ define <2 x i4> @in_constant_6_vary_invmask_nonsplat(<2 x i4> %y, <2 x i4> %mask ret <2 x i4> %r } -define <3 x i4> @in_constant_6_vary_invmask_undef(<3 x i4> %y, <3 x i4> %mask) { -; CHECK-LABEL: @in_constant_6_vary_invmask_undef( -; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[Y:%.*]], <i4 6, i4 undef, i4 6> +define <3 x i4> @in_constant_6_vary_invmask_poison(<3 x i4> %y, <3 x i4> %mask) { +; CHECK-LABEL: @in_constant_6_vary_invmask_poison( +; CHECK-NEXT: [[N0:%.*]] = xor <3 x i4> [[Y:%.*]], <i4 6, i4 poison, i4 6> ; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i4> [[N0]], [[MASK:%.*]] -; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], <i4 6, i4 undef, i4 6> +; CHECK-NEXT: [[R:%.*]] = xor <3 x i4> [[TMP1]], <i4 6, i4 poison, i4 6> ; CHECK-NEXT: ret <3 x i4> [[R]] ; - %notmask = xor <3 x i4> %mask, <i4 -1, i4 undef, i4 -1> - %n0 = xor <3 x i4> %y, <i4 6, i4 undef, i4 6> ; %x + %notmask = xor <3 x i4> %mask, <i4 -1, i4 poison, i4 -1> + %n0 = xor <3 x i4> %y, <i4 6, i4 poison, i4 6> ; %x %n1 = and <3 x i4> %n0, %notmask %r = xor <3 x i4> %n1, %y ret <3 x i4> %r diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 85a2133..8b4249b 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -1223,7 +1223,7 @@ define i8 @known_reduce_and(<2 x i8> %xx) { ; CHECK-NEXT: ret i8 1 ; %x = or <2 x i8> %xx, <i8 5, i8 3> - %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x) + %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x) %r = and i8 %v, 1 ret i8 %r } @@ -1231,12 +1231,12 @@ define i8 @known_reduce_and(<2 x i8> %xx) { define i8 @known_reduce_and_fail(<2 x i8> %xx) { ; CHECK-LABEL: @known_reduce_and_fail( ; CHECK-NEXT: [[X:%.*]] = or <2 x i8> [[XX:%.*]], <i8 5, i8 3> -; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.or.v2i8(<2 x i8> [[X]]) +; CHECK-NEXT: [[V:%.*]] = call i8 @llvm.vector.reduce.and.v2i8(<2 x i8> [[X]]) ; CHECK-NEXT: [[R:%.*]] = and i8 [[V]], 2 ; CHECK-NEXT: ret i8 [[R]] ; %x = or <2 x i8> %xx, <i8 5, i8 3> - %v = call i8 @llvm.vector.reduce.or(<2 x i8> %x) + %v = call i8 @llvm.vector.reduce.and(<2 x i8> %x) %r = and i8 %v, 2 ret i8 %r } diff --git a/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll b/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll new file mode 100644 index 0000000..f46ea9d --- /dev/null +++ b/llvm/test/Transforms/InstCombine/known-fpclass-reduce-signbit.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +define i1 @vector_reduce_maximum_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_maximum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_maximum_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + +define i1 @vector_reduce_minimum_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_minimum_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_minimum_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_max_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_max_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_max_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_max_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + +define i1 @vector_reduce_min_signbit(<4 x double> nofpclass(nan nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit +; CHECK-SAME: (<4 x double> nofpclass(nan nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + +define i1 @vector_reduce_min_signbit_fail_maybe_nan(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit_fail_maybe_nan +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: [[X_ABS:%.*]] = call <4 x double> @llvm.fabs.v4f64(<4 x double> [[X]]) +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[X_ABS]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double [[OP]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + + +define i1 @vector_reduce_min_signbit_nnan_from_fmf(<4 x double> nofpclass(nzero) %x) { +; CHECK-LABEL: define i1 @vector_reduce_min_signbit_nnan_from_fmf +; CHECK-SAME: (<4 x double> nofpclass(nzero) [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %x.abs = call <4 x double> @llvm.fabs.v4f64(<4 x double> %x) + %op = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %x.abs) + %cmp = fcmp oge double %op, 0.0 + ret i1 %cmp +} + + diff --git a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll index 847a794..5d058b2 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-negC-icmpeq-zero.ll @@ -81,39 +81,39 @@ define <4 x i1> @vec_4xi32_lshr_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ret <4 x i1> %r } -define <4 x i1> @vec_lshr_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_lshr_and_negC_eq_undef1( +define <4 x i1> @vec_lshr_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_lshr_and_negC_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y - %and = and <4 x i32> %lshr, <i32 4294967288, i32 undef, i32 4294967288, i32 4294967288> ; ~7 + %and = and <4 x i32> %lshr, <i32 4294967288, i32 poison, i32 4294967288, i32 4294967288> ; ~7 %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_lshr_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_lshr_and_negC_eq_undef2( +define <4 x i1> @vec_lshr_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_lshr_and_negC_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y %and = and <4 x i32> %lshr, <i32 4294967288, i32 4294967288, i32 4294967288, i32 4294967288> ; ~7 - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } -define <4 x i1> @vec_lshr_and_negC_eq_undef3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_lshr_and_negC_eq_undef3( +define <4 x i1> @vec_lshr_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_lshr_and_negC_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[LSHR]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y - %and = and <4 x i32> %lshr, <i32 4294967288, i32 4294967288, i32 undef, i32 4294967288> ; ~7 - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %and = and <4 x i32> %lshr, <i32 4294967288, i32 4294967288, i32 poison, i32 4294967288> ; ~7 + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll index 39f4e58..0166680 100644 --- a/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/lshr-and-signbit-icmpeq-zero.ll @@ -81,39 +81,39 @@ define <4 x i1> @vec_4xi32_lshr_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef1( +define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison1( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y - %and = and <4 x i32> %lshr, <i32 2147483648, i32 undef, i32 2147483648, i32 2147483648> + %and = and <4 x i32> %lshr, <i32 2147483648, i32 poison, i32 2147483648, i32 2147483648> %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef2( +define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison2( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y %and = and <4 x i32> %lshr, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> - %r = icmp eq <4 x i32> %and, <i32 undef, i32 0, i32 0, i32 0> + %r = icmp eq <4 x i32> %and, <i32 poison, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_undef3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_undef3( +define <4 x i1> @vec_4xi32_lshr_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_lshr_and_signbit_eq_poison3( ; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[LSHR]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %lshr = lshr <4 x i32> %x, %y - %and = and <4 x i32> %lshr, <i32 2147483648, i32 undef, i32 2147483648, i32 2147483648> - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %and = and <4 x i32> %lshr, <i32 2147483648, i32 poison, i32 2147483648, i32 2147483648> + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/masked-merge-add.ll b/llvm/test/Transforms/InstCombine/masked-merge-add.ll index f655153..0484369 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-add.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-add.ll @@ -51,7 +51,7 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] ; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1> ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] -; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: [[RET:%.*]] = add <3 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %and = and <3 x i32> %x, %m @@ -61,6 +61,21 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ret <3 x i32> %ret } +define <3 x i32> @p_vec_poison(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) { +; CHECK-LABEL: @p_vec_poison( +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 poison, i32 -1> +; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: ret <3 x i32> [[RET]] +; + %and = and <3 x i32> %x, %m + %neg = xor <3 x i32> %m, <i32 -1, i32 poison, i32 -1> + %and1 = and <3 x i32> %neg, %y + %ret = add <3 x i32> %and, %and1 + ret <3 x i32> %ret +} + ; ============================================================================ ; ; Constant mask. ; ============================================================================ ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-or.ll b/llvm/test/Transforms/InstCombine/masked-merge-or.ll index b49ec07..0531a53 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-or.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-or.ll @@ -51,7 +51,7 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] ; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1> ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] -; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: [[RET:%.*]] = or <3 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %and = and <3 x i32> %x, %m @@ -61,6 +61,21 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ret <3 x i32> %ret } +define <3 x i32> @p_vec_poison(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) { +; CHECK-LABEL: @p_vec_poison( +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 poison, i32 -1> +; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: ret <3 x i32> [[RET]] +; + %and = and <3 x i32> %x, %m + %neg = xor <3 x i32> %m, <i32 -1, i32 poison, i32 -1> + %and1 = and <3 x i32> %neg, %y + %ret = or <3 x i32> %and, %and1 + ret <3 x i32> %ret +} + ; ============================================================================ ; ; Constant mask. ; ============================================================================ ; diff --git a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll index a6d201b..74cc762 100644 --- a/llvm/test/Transforms/InstCombine/masked-merge-xor.ll +++ b/llvm/test/Transforms/InstCombine/masked-merge-xor.ll @@ -51,7 +51,7 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] ; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 undef, i32 -1> ; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] -; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[AND]], [[AND1]] ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %and = and <3 x i32> %x, %m @@ -61,6 +61,21 @@ define <3 x i32> @p_vec_undef(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) ret <3 x i32> %ret } +define <3 x i32> @p_vec_poison(<3 x i32> %x, <3 x i32> %y, <3 x i32> noundef %m) { +; CHECK-LABEL: @p_vec_poison( +; CHECK-NEXT: [[AND:%.*]] = and <3 x i32> [[X:%.*]], [[M:%.*]] +; CHECK-NEXT: [[NEG:%.*]] = xor <3 x i32> [[M]], <i32 -1, i32 poison, i32 -1> +; CHECK-NEXT: [[AND1:%.*]] = and <3 x i32> [[NEG]], [[Y:%.*]] +; CHECK-NEXT: [[RET:%.*]] = or disjoint <3 x i32> [[AND]], [[AND1]] +; CHECK-NEXT: ret <3 x i32> [[RET]] +; + %and = and <3 x i32> %x, %m + %neg = xor <3 x i32> %m, <i32 -1, i32 poison, i32 -1> + %and1 = and <3 x i32> %neg, %y + %ret = xor <3 x i32> %and, %and1 + ret <3 x i32> %ret +} + ; ============================================================================ ; ; Constant mask. ; ============================================================================ ; diff --git a/llvm/test/Transforms/InstCombine/min-positive.ll b/llvm/test/Transforms/InstCombine/min-positive.ll index 1fb212b..d2c2e90 100644 --- a/llvm/test/Transforms/InstCombine/min-positive.ll +++ b/llvm/test/Transforms/InstCombine/min-positive.ll @@ -67,16 +67,16 @@ define <2 x i1> @smin_commute_vec(<2 x i32> %x, <2 x i32> %other) { ret <2 x i1> %test } -define <2 x i1> @smin_commute_vec_undef_elts(<2 x i32> %x, <2 x i32> %other) { -; CHECK-LABEL: @smin_commute_vec_undef_elts( -; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[OTHER:%.*]], <i32 0, i32 undef> +define <2 x i1> @smin_commute_vec_poison_elts(<2 x i32> %x, <2 x i32> %other) { +; CHECK-LABEL: @smin_commute_vec_poison_elts( +; CHECK-NEXT: [[TEST:%.*]] = icmp sgt <2 x i32> [[OTHER:%.*]], <i32 0, i32 poison> ; CHECK-NEXT: ret <2 x i1> [[TEST]] ; %notneg = and <2 x i32> %x, <i32 7, i32 7> %positive = or <2 x i32> %notneg, <i32 1, i32 1> %cmp = icmp slt <2 x i32> %other, %positive %sel = select <2 x i1> %cmp, <2 x i32> %other, <2 x i32> %positive - %test = icmp sgt <2 x i32> %sel, <i32 0, i32 undef> + %test = icmp sgt <2 x i32> %sel, <i32 0, i32 poison> ret <2 x i1> %test } ; %positive might be zero diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll index 8391fe3..8b47dc7a 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fold.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll @@ -131,7 +131,7 @@ define i64 @t9(i32 %a) { define float @t10(i32 %x) { ; CHECK-LABEL: @t10( ; CHECK-NEXT: [[R1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 255) -; CHECK-NEXT: [[R:%.*]] = sitofp i32 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i32 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %f_x = sitofp i32 %x to float @@ -143,7 +143,7 @@ define float @t10(i32 %x) { define float @t11(i64 %x) { ; CHECK-LABEL: @t11( ; CHECK-NEXT: [[R1:%.*]] = call i64 @llvm.smax.i64(i64 [[X:%.*]], i64 255) -; CHECK-NEXT: [[R:%.*]] = sitofp i64 [[R1]] to float +; CHECK-NEXT: [[R:%.*]] = uitofp nneg i64 [[R1]] to float ; CHECK-NEXT: ret float [[R]] ; %f_x = sitofp i64 %x to float @@ -526,7 +526,7 @@ falselabel: define double @PR31751_umin1(i32 %x) { ; CHECK-LABEL: @PR31751_umin1( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp slt i32 %x, 0 @@ -538,7 +538,7 @@ define double @PR31751_umin1(i32 %x) { define double @PR31751_umin2(i32 %x) { ; CHECK-LABEL: @PR31751_umin2( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ult i32 %x, 2147483647 @@ -550,7 +550,7 @@ define double @PR31751_umin2(i32 %x) { define double @PR31751_umin3(i32 %x) { ; CHECK-LABEL: @PR31751_umin3( ; CHECK-NEXT: [[SEL:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 2147483647) -; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[SEL]] to double +; CHECK-NEXT: [[CONV:%.*]] = uitofp nneg i32 [[SEL]] to double ; CHECK-NEXT: ret double [[CONV]] ; %cmp = icmp ugt i32 %x, 2147483647 @@ -1360,14 +1360,15 @@ define i8 @PR14613_smax(i8 %x) { define i8 @PR46271(<2 x i8> %x) { ; CHECK-LABEL: @PR46271( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i8> @llvm.smax.v2i8(<2 x i8> [[X:%.*]], <2 x i8> <i8 -1, i8 -1>) +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i8> [[X:%.*]], <i8 poison, i8 -1> +; CHECK-NEXT: [[A_INV:%.*]] = icmp slt <2 x i8> [[X]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = select <2 x i1> [[A_INV]], <2 x i8> <i8 poison, i8 0>, <2 x i8> [[TMP3]] ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i8> [[TMP1]], i64 1 -; CHECK-NEXT: [[R:%.*]] = xor i8 [[TMP2]], -1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 [[TMP2]] ; %a = icmp sgt <2 x i8> %x, <i8 -1, i8 -1> - %b = select <2 x i1> %a, <2 x i8> %x, <2 x i8> <i8 undef, i8 -1> - %not = xor <2 x i8> %b, <i8 undef, i8 -1> + %b = select <2 x i1> %a, <2 x i8> %x, <2 x i8> <i8 poison, i8 -1> + %not = xor <2 x i8> %b, <i8 poison, i8 -1> %r = extractelement <2 x i8> %not, i32 1 ret i8 %r } diff --git a/llvm/test/Transforms/InstCombine/minmax-fp.ll b/llvm/test/Transforms/InstCombine/minmax-fp.ll index f89e8a1..b9e46ca 100644 --- a/llvm/test/Transforms/InstCombine/minmax-fp.ll +++ b/llvm/test/Transforms/InstCombine/minmax-fp.ll @@ -257,7 +257,7 @@ define double @t16(i32 %x) { define double @t17(i32 %x) { ; CHECK-LABEL: @t17( ; CHECK-NEXT: [[SEL1:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 2) -; CHECK-NEXT: [[SEL:%.*]] = sitofp i32 [[SEL1]] to double +; CHECK-NEXT: [[SEL:%.*]] = uitofp nneg i32 [[SEL1]] to double ; CHECK-NEXT: ret double [[SEL]] ; %cmp = icmp sgt i32 %x, 2 diff --git a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll index ae2e115..a76f0f8 100644 --- a/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/minmax-intrinsics.ll @@ -393,7 +393,7 @@ define i8 @smax_of_nots(i8 %x, i8 %y) { ret i8 %m } -; Vectors are ok (including undef lanes of not ops) +; Vectors are ok (including poison lanes of not ops) define <3 x i8> @smin_of_nots(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @smin_of_nots( @@ -401,8 +401,8 @@ define <3 x i8> @smin_of_nots(<3 x i8> %x, <3 x i8> %y) { ; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1> ; CHECK-NEXT: ret <3 x i8> [[M]] ; - %notx = xor <3 x i8> %x, <i8 -1, i8 undef, i8 -1> - %noty = xor <3 x i8> %y, <i8 -1, i8 -1, i8 undef> + %notx = xor <3 x i8> %x, <i8 -1, i8 poison, i8 -1> + %noty = xor <3 x i8> %y, <i8 -1, i8 -1, i8 poison> %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %notx, <3 x i8> %noty) ret <3 x i8> %m } @@ -473,16 +473,16 @@ define i8 @smax_of_not_and_const(i8 %x) { ret i8 %m } -; Vectors are ok (including undef lanes of not ops and min/max constant operand) +; Vectors are ok (including poison lanes of not ops and min/max constant operand) define <3 x i8> @smin_of_not_and_const(<3 x i8> %x) { ; CHECK-LABEL: @smin_of_not_and_const( -; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -43, i8 undef, i8 -44>) +; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i8> @llvm.smax.v3i8(<3 x i8> [[X:%.*]], <3 x i8> <i8 -43, i8 poison, i8 -44>) ; CHECK-NEXT: [[M:%.*]] = xor <3 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1> ; CHECK-NEXT: ret <3 x i8> [[M]] ; - %notx = xor <3 x i8> %x, <i8 -1, i8 -1, i8 undef> - %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> <i8 42, i8 undef, i8 43>, <3 x i8> %notx) + %notx = xor <3 x i8> %x, <i8 -1, i8 -1, i8 poison> + %m = call <3 x i8> @llvm.smin.v3i8(<3 x i8> <i8 42, i8 poison, i8 43>, <3 x i8> %notx) ret <3 x i8> %m } @@ -706,7 +706,7 @@ define <3 x i8> @smax_negation_vec(<3 x i8> %x) { ; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.abs.v3i8(<3 x i8> [[X:%.*]], i1 false) ; CHECK-NEXT: ret <3 x i8> [[R]] ; - %s = sub <3 x i8> <i8 0, i8 undef, i8 0>, %x + %s = sub <3 x i8> <i8 0, i8 poison, i8 0>, %x %r = call <3 x i8> @llvm.smax.v3i8(<3 x i8> %x, <3 x i8> %s) ret <3 x i8> %r } @@ -912,7 +912,7 @@ define <3 x i8> @umin_non_zero_idiom4(<3 x i8> %a) { ; CHECK-NEXT: [[RES:%.*]] = zext <3 x i1> [[TMP1]] to <3 x i8> ; CHECK-NEXT: ret <3 x i8> [[RES]] ; - %res = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> <i8 1, i8 undef, i8 undef>) + %res = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %a, <3 x i8> <i8 1, i8 poison, i8 poison>) ret <3 x i8> %res } @@ -2118,15 +2118,15 @@ define i8 @umin_offset_uses(i8 %x) { ret i8 %m } -; TODO: This could transform, but undef element must not propagate to the new add. +; TODO: This could transform -define <3 x i8> @umax_vector_splat_undef(<3 x i8> %x) { -; CHECK-LABEL: @umax_vector_splat_undef( -; CHECK-NEXT: [[A:%.*]] = add nuw <3 x i8> [[X:%.*]], <i8 undef, i8 64, i8 64> +define <3 x i8> @umax_vector_splat_poison(<3 x i8> %x) { +; CHECK-LABEL: @umax_vector_splat_poison( +; CHECK-NEXT: [[A:%.*]] = add nuw <3 x i8> [[X:%.*]], <i8 poison, i8 64, i8 64> ; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.umax.v3i8(<3 x i8> [[A]], <3 x i8> <i8 13, i8 -126, i8 -126>) ; CHECK-NEXT: ret <3 x i8> [[R]] ; - %a = add nuw <3 x i8> %x, <i8 undef, i8 64, i8 64> + %a = add nuw <3 x i8> %x, <i8 poison, i8 64, i8 64> %r = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %a, <3 x i8> <i8 13, i8 130, i8 130>) ret <3 x i8> %r } @@ -2506,8 +2506,8 @@ entry: ret i8 %val } -define <3 x i8> @fold_umax_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) { -; CHECK-LABEL: @fold_umax_with_knownbits_info_undef_in_splat( +define <3 x i8> @fold_umax_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @fold_umax_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A1:%.*]] = or <3 x i8> [[A:%.*]], <i8 1, i8 1, i8 1> ; CHECK-NEXT: [[A2:%.*]] = shl <3 x i8> [[B:%.*]], <i8 1, i8 1, i8 1> @@ -2518,7 +2518,7 @@ entry: %a1 = or <3 x i8> %a, <i8 1, i8 1, i8 1> %a2 = shl <3 x i8> %b, <i8 1, i8 1, i8 1> %sub = sub <3 x i8> %a1, %a2 - %val = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %sub, <3 x i8> <i8 1, i8 undef, i8 1>) + %val = call <3 x i8> @llvm.umax.v3i8(<3 x i8> %sub, <3 x i8> <i8 1, i8 poison, i8 1>) ret <3 x i8> %val } @@ -2535,8 +2535,8 @@ entry: ret i8 %val } -define <3 x i8> @fold_umin_with_knownbits_info_undef_in_splat(<3 x i8> %a, <3 x i8> %b) { -; CHECK-LABEL: @fold_umin_with_knownbits_info_undef_in_splat( +define <3 x i8> @fold_umin_with_knownbits_info_poison_in_splat(<3 x i8> %a, <3 x i8> %b) { +; CHECK-LABEL: @fold_umin_with_knownbits_info_poison_in_splat( ; CHECK-NEXT: entry: ; CHECK-NEXT: ret <3 x i8> <i8 3, i8 3, i8 3> ; @@ -2544,7 +2544,7 @@ entry: %a1 = or <3 x i8> %a, <i8 3, i8 3, i8 3> %a2 = shl <3 x i8> %b, <i8 2, i8 2, i8 2> %sub = sub <3 x i8> %a1, %a2 - %val = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sub, <3 x i8> <i8 3, i8 undef, i8 3>) + %val = call <3 x i8> @llvm.umin.v3i8(<3 x i8> %sub, <3 x i8> <i8 3, i8 poison, i8 3>) ret <3 x i8> %val } @@ -2581,3 +2581,92 @@ entry: %val = call i8 @llvm.umin.i8(i8 %sub, i8 3) ret i8 %val } + +define i8 @test_umax_and(i8 %x, i8 %y) { +; CHECK-LABEL: @test_umax_and( +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umax.i8(i8 [[X1:%.*]], i8 [[Y1:%.*]]) +; CHECK-NEXT: [[RES1:%.*]] = and i8 [[RES]], -64 +; CHECK-NEXT: ret i8 [[RES1]] +; + %x1 = and i8 %x, -64 + %y1 = and i8 %y, -64 + %res = call i8 @llvm.umax.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_umin_and(i8 %x, i8 %y) { +; CHECK-LABEL: @test_umin_and( +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[X1:%.*]], i8 [[Y1:%.*]]) +; CHECK-NEXT: [[RES1:%.*]] = and i8 [[RES]], -64 +; CHECK-NEXT: ret i8 [[RES1]] +; + %x1 = and i8 %x, -64 + %y1 = and i8 %y, -64 + %res = call i8 @llvm.umin.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_smax_and(i8 %x, i8 %y) { +; CHECK-LABEL: @test_smax_and( +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.smax.i8(i8 [[X1:%.*]], i8 [[Y1:%.*]]) +; CHECK-NEXT: [[RES1:%.*]] = and i8 [[RES]], -64 +; CHECK-NEXT: ret i8 [[RES1]] +; + %x1 = and i8 %x, -64 + %y1 = and i8 %y, -64 + %res = call i8 @llvm.smax.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_smin_and(i8 %x, i8 %y) { +; CHECK-LABEL: @test_smin_and( +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.smin.i8(i8 [[X1:%.*]], i8 [[Y1:%.*]]) +; CHECK-NEXT: [[RES1:%.*]] = and i8 [[RES]], -64 +; CHECK-NEXT: ret i8 [[RES1]] +; + %x1 = and i8 %x, -64 + %y1 = and i8 %y, -64 + %res = call i8 @llvm.smin.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_smin_and_mismatch(i8 %x, i8 %y) { +; CHECK-LABEL: @test_smin_and_mismatch( +; CHECK-NEXT: [[X1:%.*]] = and i8 [[X:%.*]], -64 +; CHECK-NEXT: [[Y1:%.*]] = and i8 [[Y:%.*]], -32 +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.smin.i8(i8 [[X1]], i8 [[Y1]]) +; CHECK-NEXT: ret i8 [[RES]] +; + %x1 = and i8 %x, -64 + %y1 = and i8 %y, -32 + %res = call i8 @llvm.smin.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_smin_and_non_negated_pow2(i8 %x, i8 %y) { +; CHECK-LABEL: @test_smin_and_non_negated_pow2( +; CHECK-NEXT: [[X1:%.*]] = and i8 [[X:%.*]], 31 +; CHECK-NEXT: [[Y1:%.*]] = and i8 [[Y:%.*]], 31 +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.smin.i8(i8 [[X1]], i8 [[Y1]]) +; CHECK-NEXT: ret i8 [[RES]] +; + %x1 = and i8 %x, 31 + %y1 = and i8 %y, 31 + %res = call i8 @llvm.smin.i8(i8 %x1, i8 %y1) + ret i8 %res +} + +define i8 @test_smin_and_multiuse(i8 %x, i8 %y) { +; CHECK-LABEL: @test_smin_and_multiuse( +; CHECK-NEXT: [[X1:%.*]] = and i8 [[X:%.*]], 31 +; CHECK-NEXT: [[Y1:%.*]] = and i8 [[Y:%.*]], 31 +; CHECK-NEXT: call void @use(i8 [[Y1]]) +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.smin.i8(i8 [[X1]], i8 [[Y1]]) +; CHECK-NEXT: ret i8 [[RES]] +; + %x1 = and i8 %x, 31 + %y1 = and i8 %y, 31 + call void @use(i8 %y1) + %res = call i8 @llvm.smin.i8(i8 %x1, i8 %y1) + ret i8 %res +} diff --git a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll index 8fe4261..f47c557 100644 --- a/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/mul-inseltpoison.ll @@ -784,7 +784,7 @@ define <2 x i8> @negate_if_false_commute(<2 x i8> %px, <2 x i1> %cond) { ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = sdiv <2 x i8> <i8 42, i8 5>, %px ; thwart complexity-based canonicalization - %sel = select <2 x i1> %cond, <2 x i8> <i8 1, i8 undef>, <2 x i8> <i8 -1, i8 -1> + %sel = select <2 x i1> %cond, <2 x i8> <i8 1, i8 poison>, <2 x i8> <i8 -1, i8 -1> %r = mul <2 x i8> %x, %sel ret <2 x i8> %r } @@ -931,7 +931,7 @@ define <vscale x 2 x i64> @mul_scalable_splat_zero(<vscale x 2 x i64> %z) { ; CHECK-LABEL: @mul_scalable_splat_zero( ; CHECK-NEXT: ret <vscale x 2 x i64> zeroinitializer ; - %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer + %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer %t3 = mul <vscale x 2 x i64> %shuf, %z ret <vscale x 2 x i64> %t3 } @@ -973,14 +973,14 @@ define <2 x i32> @mulsub1_vec_nonuniform(<2 x i32> %a0, <2 x i32> %a1) { ret <2 x i32> %mul } -define <2 x i32> @mulsub1_vec_nonuniform_undef(<2 x i32> %a0, <2 x i32> %a1) { -; CHECK-LABEL: @mulsub1_vec_nonuniform_undef( +define <2 x i32> @mulsub1_vec_nonuniform_poison(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @mulsub1_vec_nonuniform_poison( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 - %mul = mul <2 x i32> %sub, <i32 -4, i32 undef> + %mul = mul <2 x i32> %sub, <i32 -4, i32 poison> ret <2 x i32> %mul } @@ -1017,14 +1017,14 @@ define <2 x i32> @mulsub2_vec_nonuniform(<2 x i32> %a0) { ret <2 x i32> %mul } -define <2 x i32> @mulsub2_vec_nonuniform_undef(<2 x i32> %a0) { -; CHECK-LABEL: @mulsub2_vec_nonuniform_undef( +define <2 x i32> @mulsub2_vec_nonuniform_poison(<2 x i32> %a0) { +; CHECK-LABEL: @mulsub2_vec_nonuniform_poison( ; CHECK-NEXT: [[SUB_NEG:%.*]] = add <2 x i32> [[A0:%.*]], <i32 -16, i32 -32> ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> <i32 16, i32 32>, %a0 - %mul = mul <2 x i32> %sub, <i32 -4, i32 undef> + %mul = mul <2 x i32> %sub, <i32 -4, i32 poison> ret <2 x i32> %mul } @@ -1061,14 +1061,14 @@ define <2 x i32> @muladd2_vec_nonuniform(<2 x i32> %a0) { ret <2 x i32> %mul } -define <2 x i32> @muladd2_vec_nonuniform_undef(<2 x i32> %a0) { -; CHECK-LABEL: @muladd2_vec_nonuniform_undef( +define <2 x i32> @muladd2_vec_nonuniform_poison(<2 x i32> %a0) { +; CHECK-LABEL: @muladd2_vec_nonuniform_poison( ; CHECK-NEXT: [[ADD_NEG:%.*]] = sub <2 x i32> <i32 -16, i32 -32>, [[A0:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[ADD_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, <i32 16, i32 32> - %mul = mul <2 x i32> %add, <i32 -4, i32 undef> + %mul = mul <2 x i32> %add, <i32 -4, i32 poison> ret <2 x i32> %mul } diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll index d4a689c6..227ca4a 100644 --- a/llvm/test/Transforms/InstCombine/mul.ll +++ b/llvm/test/Transforms/InstCombine/mul.ll @@ -1496,7 +1496,7 @@ define <2 x i8> @negate_if_false_commute(<2 x i8> %px, <2 x i1> %cond) { ; CHECK-NEXT: ret <2 x i8> [[R]] ; %x = sdiv <2 x i8> <i8 42, i8 5>, %px ; thwart complexity-based canonicalization - %sel = select <2 x i1> %cond, <2 x i8> <i8 1, i8 undef>, <2 x i8> <i8 -1, i8 -1> + %sel = select <2 x i1> %cond, <2 x i8> <i8 1, i8 poison>, <2 x i8> <i8 -1, i8 -1> %r = mul <2 x i8> %x, %sel ret <2 x i8> %r } @@ -1643,7 +1643,7 @@ define <vscale x 2 x i64> @mul_scalable_splat_zero(<vscale x 2 x i64> %z) { ; CHECK-LABEL: @mul_scalable_splat_zero( ; CHECK-NEXT: ret <vscale x 2 x i64> zeroinitializer ; - %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 0, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer + %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer %t3 = mul <vscale x 2 x i64> %shuf, %z ret <vscale x 2 x i64> %t3 } @@ -1752,14 +1752,14 @@ define <2 x i32> @mulsub1_vec_nonuniform(<2 x i32> %a0, <2 x i32> %a1) { ret <2 x i32> %mul } -define <2 x i32> @mulsub1_vec_nonuniform_undef(<2 x i32> %a0, <2 x i32> %a1) { -; CHECK-LABEL: @mulsub1_vec_nonuniform_undef( +define <2 x i32> @mulsub1_vec_nonuniform_poison(<2 x i32> %a0, <2 x i32> %a1) { +; CHECK-LABEL: @mulsub1_vec_nonuniform_poison( ; CHECK-NEXT: [[SUB_NEG:%.*]] = sub <2 x i32> [[A0:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> %a1, %a0 - %mul = mul <2 x i32> %sub, <i32 -4, i32 undef> + %mul = mul <2 x i32> %sub, <i32 -4, i32 poison> ret <2 x i32> %mul } @@ -1796,14 +1796,14 @@ define <2 x i32> @mulsub2_vec_nonuniform(<2 x i32> %a0) { ret <2 x i32> %mul } -define <2 x i32> @mulsub2_vec_nonuniform_undef(<2 x i32> %a0) { -; CHECK-LABEL: @mulsub2_vec_nonuniform_undef( +define <2 x i32> @mulsub2_vec_nonuniform_poison(<2 x i32> %a0) { +; CHECK-LABEL: @mulsub2_vec_nonuniform_poison( ; CHECK-NEXT: [[SUB_NEG:%.*]] = add <2 x i32> [[A0:%.*]], <i32 -16, i32 -32> ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[SUB_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %sub = sub <2 x i32> <i32 16, i32 32>, %a0 - %mul = mul <2 x i32> %sub, <i32 -4, i32 undef> + %mul = mul <2 x i32> %sub, <i32 -4, i32 poison> ret <2 x i32> %mul } @@ -1819,15 +1819,15 @@ define i8 @mulsub_nsw(i8 %a1, i8 %a2) { } ; It would be safe to keep the nsw on the shl here, but only because the mul -; to shl transform happens to replace undef with 0. -define <2 x i8> @mulsub_nsw_undef(<2 x i8> %a1, <2 x i8> %a2) { -; CHECK-LABEL: @mulsub_nsw_undef( +; to shl transform happens to replace poison with 0. +define <2 x i8> @mulsub_nsw_poison(<2 x i8> %a1, <2 x i8> %a2) { +; CHECK-LABEL: @mulsub_nsw_poison( ; CHECK-NEXT: [[A_NEG:%.*]] = sub nsw <2 x i8> [[A2:%.*]], [[A1:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i8> [[A_NEG]], <i8 1, i8 0> ; CHECK-NEXT: ret <2 x i8> [[MUL]] ; %a = sub nsw <2 x i8> %a1, %a2 - %mul = mul nsw <2 x i8> %a, <i8 -2, i8 undef> + %mul = mul nsw <2 x i8> %a, <i8 -2, i8 poison> ret <2 x i8> %mul } @@ -1864,14 +1864,14 @@ define <2 x i32> @muladd2_vec_nonuniform(<2 x i32> %a0) { ret <2 x i32> %mul } -define <2 x i32> @muladd2_vec_nonuniform_undef(<2 x i32> %a0) { -; CHECK-LABEL: @muladd2_vec_nonuniform_undef( +define <2 x i32> @muladd2_vec_nonuniform_poison(<2 x i32> %a0) { +; CHECK-LABEL: @muladd2_vec_nonuniform_poison( ; CHECK-NEXT: [[ADD_NEG:%.*]] = sub <2 x i32> <i32 -16, i32 -32>, [[A0:%.*]] ; CHECK-NEXT: [[MUL:%.*]] = shl <2 x i32> [[ADD_NEG]], <i32 2, i32 0> ; CHECK-NEXT: ret <2 x i32> [[MUL]] ; %add = add <2 x i32> %a0, <i32 16, i32 32> - %mul = mul <2 x i32> %add, <i32 -4, i32 undef> + %mul = mul <2 x i32> %add, <i32 -4, i32 poison> ret <2 x i32> %mul } diff --git a/llvm/test/Transforms/InstCombine/not-add.ll b/llvm/test/Transforms/InstCombine/not-add.ll index 877f558..9ba37b6 100644 --- a/llvm/test/Transforms/InstCombine/not-add.ll +++ b/llvm/test/Transforms/InstCombine/not-add.ll @@ -115,26 +115,26 @@ define <4 x i32> @vector_test(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %nota } -define <4 x i32> @vector_test_undef(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vector_test_undef( +define <4 x i32> @vector_test_poison(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vector_test_poison( ; CHECK-NEXT: [[NOTA:%.*]] = sub <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[NOTA]] ; - %notx = xor <4 x i32> %x, <i32 -1, i32 undef, i32 undef, i32 -1> + %notx = xor <4 x i32> %x, <i32 -1, i32 poison, i32 poison, i32 -1> %a = add <4 x i32> %notx, %y - %nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 undef, i32 undef> + %nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 poison, i32 poison> ret <4 x i32> %nota } -define <4 x i32> @vector_test_undef_nsw_nuw(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vector_test_undef_nsw_nuw( +define <4 x i32> @vector_test_poison_nsw_nuw(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vector_test_poison_nsw_nuw( ; CHECK-NEXT: [[NOTA:%.*]] = sub nuw nsw <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[NOTA]] ; - %notx = xor <4 x i32> %x, <i32 -1, i32 undef, i32 undef, i32 -1> + %notx = xor <4 x i32> %x, <i32 -1, i32 poison, i32 poison, i32 -1> %a = add nsw nuw <4 x i32> %notx, %y - %nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 undef, i32 undef> + %nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 poison, i32 poison> ret <4 x i32> %nota } diff --git a/llvm/test/Transforms/InstCombine/not.ll b/llvm/test/Transforms/InstCombine/not.ll index 98b5d980..0c2c619 100644 --- a/llvm/test/Transforms/InstCombine/not.ll +++ b/llvm/test/Transforms/InstCombine/not.ll @@ -430,9 +430,9 @@ define <3 x i5> @not_or_neg_commute_vec(<3 x i5> %x, <3 x i5> %p) { ; CHECK-NEXT: ret <3 x i5> [[NOT]] ; %y = mul <3 x i5> %p, <i5 1, i5 2, i5 3> ; thwart complexity-based-canonicalization - %s = sub <3 x i5> <i5 0, i5 0, i5 undef>, %x + %s = sub <3 x i5> <i5 0, i5 0, i5 poison>, %x %o = or <3 x i5> %y, %s - %not = xor <3 x i5> %o, <i5 -1, i5 undef, i5 -1> + %not = xor <3 x i5> %o, <i5 -1, i5 poison, i5 -1> ret <3 x i5> %not } diff --git a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index c16633e..3fd4a17 100644 --- a/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/Transforms/InstCombine/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -95,41 +95,41 @@ define <4 x i1> @p5_vector_urem_by_const__nonsplat(<4 x i32> %x, <4 x i32> %y) { ret <4 x i1> %t2 } -define <4 x i1> @p6_vector_urem_by_const__nonsplat_undef0(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @p6_vector_urem_by_const__nonsplat_undef0( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 undef, i32 128> -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], zeroinitializer +; The poison value in the vector makes the whole function UB. + +define <4 x i1> @p6_vector_urem_by_const__nonsplat_poison0(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @p6_vector_urem_by_const__nonsplat_poison0( +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 poison, i32 128> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], zeroinitializer ; CHECK-NEXT: ret <4 x i1> [[T2]] ; - %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128> + %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 poison, i32 128> %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 0, i32 0> ret <4 x i1> %t2 } -define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_undef2( +define <4 x i1> @p7_vector_urem_by_const__nonsplat_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @p7_vector_urem_by_const__nonsplat_poison2( ; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 128, i32 128> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 undef, i32 0> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 poison, i32 0> ; CHECK-NEXT: ret <4 x i1> [[T2]] ; %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 128, i32 128> ; clearly a power-of-two or zero %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two - %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0> + %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 poison, i32 0> ret <4 x i1> %t2 } -define <4 x i1> @p8_vector_urem_by_const__nonsplat_undef3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @p8_vector_urem_by_const__nonsplat_undef3( -; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 undef, i32 128> -; CHECK-NEXT: [[T1:%.*]] = urem <4 x i32> [[T0]], <i32 6, i32 6, i32 6, i32 6> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T1]], <i32 0, i32 0, i32 undef, i32 0> +define <4 x i1> @p8_vector_urem_by_const__nonsplat_poison3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @p8_vector_urem_by_const__nonsplat_poison3( +; CHECK-NEXT: [[T0:%.*]] = and <4 x i32> [[X:%.*]], <i32 128, i32 128, i32 poison, i32 128> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <4 x i32> [[T0]], <i32 0, i32 0, i32 poison, i32 0> ; CHECK-NEXT: ret <4 x i1> [[T2]] ; - %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 undef, i32 128> + %t0 = and <4 x i32> %x, <i32 128, i32 128, i32 poison, i32 128> %t1 = urem <4 x i32> %t0, <i32 6, i32 6, i32 6, i32 6> ; '6' is clearly not a power of two - %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 undef, i32 0> + %t2 = icmp eq <4 x i32> %t1, <i32 0, i32 0, i32 poison, i32 0> ret <4 x i1> %t2 } diff --git a/llvm/test/Transforms/InstCombine/operand-complexity.ll b/llvm/test/Transforms/InstCombine/operand-complexity.ll index 62cfc76..541a152 100644 --- a/llvm/test/Transforms/InstCombine/operand-complexity.ll +++ b/llvm/test/Transforms/InstCombine/operand-complexity.ll @@ -29,15 +29,15 @@ define <2 x i8> @neg_vec(<2 x i8> %x) { ret <2 x i8> %r } -define <2 x i8> @neg_vec_undef(<2 x i8> %x) { -; CHECK-LABEL: @neg_vec_undef( +define <2 x i8> @neg_vec_poison(<2 x i8> %x) { +; CHECK-LABEL: @neg_vec_poison( ; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42> -; CHECK-NEXT: [[NEGX:%.*]] = sub <2 x i8> <i8 0, i8 undef>, [[X]] +; CHECK-NEXT: [[NEGX:%.*]] = sub <2 x i8> <i8 0, i8 poison>, [[X]] ; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[BO]], [[NEGX]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %bo = udiv <2 x i8> %x, <i8 42, i8 -42> - %negx = sub <2 x i8> <i8 0, i8 undef>, %x + %negx = sub <2 x i8> <i8 0, i8 poison>, %x %r = xor <2 x i8> %negx, %bo ret <2 x i8> %r } @@ -70,15 +70,15 @@ define <2 x i8> @not_vec(<2 x i8> %x) { ret <2 x i8> %r } -define <2 x i8> @not_vec_undef(<2 x i8> %x) { -; CHECK-LABEL: @not_vec_undef( +define <2 x i8> @not_vec_poison(<2 x i8> %x) { +; CHECK-LABEL: @not_vec_poison( ; CHECK-NEXT: [[BO:%.*]] = udiv <2 x i8> [[X:%.*]], <i8 42, i8 -42> -; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 undef> +; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i8> [[X]], <i8 -1, i8 poison> ; CHECK-NEXT: [[R:%.*]] = mul <2 x i8> [[BO]], [[NOTX]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %bo = udiv <2 x i8> %x, <i8 42, i8 -42> - %notx = xor <2 x i8> <i8 -1, i8 undef>, %x + %notx = xor <2 x i8> <i8 -1, i8 poison>, %x %r = mul <2 x i8> %notx, %bo ret <2 x i8> %r } @@ -134,8 +134,8 @@ define <2 x float> @fneg_vec(<2 x float> %x) { ret <2 x float> %r } -define <2 x float> @fneg_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fneg_vec_undef( +define <2 x float> @fneg_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fneg_vec_poison( ; CHECK-NEXT: [[BO:%.*]] = fdiv <2 x float> [[X:%.*]], <float 4.200000e+01, float -4.200000e+01> ; CHECK-NEXT: [[FNEGX:%.*]] = fneg <2 x float> [[X]] ; CHECK-NEXT: [[R:%.*]] = fmul <2 x float> [[BO]], [[FNEGX]] @@ -143,7 +143,7 @@ define <2 x float> @fneg_vec_undef(<2 x float> %x) { ; CHECK-NEXT: ret <2 x float> [[R]] ; %bo = fdiv <2 x float> %x, <float 42.0, float -42.0> - %fnegx = fsub <2 x float> <float -0.0, float undef>, %x + %fnegx = fsub <2 x float> <float -0.0, float poison>, %x %r = fmul <2 x float> %fnegx, %bo call void @use_vec(<2 x float> %fnegx) ret <2 x float> %r diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 1b1a6ff..6e2085a 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -262,26 +262,26 @@ define <2 x i1> @and_icmp_eq_0_vector(<2 x i32> %A, <2 x i32> %B) { ret <2 x i1> %D } -define <2 x i1> @and_icmp_eq_0_vector_undef1(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @and_icmp_eq_0_vector_undef1( +define <2 x i1> @and_icmp_eq_0_vector_poison1(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @and_icmp_eq_0_vector_poison1( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[D]] ; - %C1 = icmp eq <2 x i32> %A, <i32 0, i32 undef> - %C2 = icmp eq <2 x i32> %B, <i32 0, i32 undef> + %C1 = icmp eq <2 x i32> %A, <i32 0, i32 poison> + %C2 = icmp eq <2 x i32> %B, <i32 0, i32 poison> %D = and <2 x i1> %C1, %C2 ret <2 x i1> %D } -define <2 x i1> @and_icmp_eq_0_vector_undef2(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @and_icmp_eq_0_vector_undef2( +define <2 x i1> @and_icmp_eq_0_vector_poison2(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @and_icmp_eq_0_vector_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = icmp eq <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[D]] ; - %C1 = icmp eq <2 x i32> %A, <i32 0, i32 undef> - %C2 = icmp eq <2 x i32> %B, <i32 undef, i32 0> + %C1 = icmp eq <2 x i32> %A, <i32 0, i32 poison> + %C2 = icmp eq <2 x i32> %B, <i32 poison, i32 0> %D = and <2 x i1> %C1, %C2 ret <2 x i1> %D } @@ -566,17 +566,17 @@ define <2 x i1> @test37_uniform(<2 x i32> %x) { ret <2 x i1> %ret1 } -define <2 x i1> @test37_undef(<2 x i32> %x) { -; CHECK-LABEL: @test37_undef( -; CHECK-NEXT: [[ADD1:%.*]] = add <2 x i32> [[X:%.*]], <i32 7, i32 undef> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[ADD1]], <i32 30, i32 undef> -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> [[X]], <i32 23, i32 undef> +define <2 x i1> @test37_poison(<2 x i32> %x) { +; CHECK-LABEL: @test37_poison( +; CHECK-NEXT: [[ADD1:%.*]] = add <2 x i32> [[X:%.*]], <i32 7, i32 poison> +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i32> [[ADD1]], <i32 30, i32 poison> +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> [[X]], <i32 23, i32 poison> ; CHECK-NEXT: [[RET1:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[RET1]] ; - %add1 = add <2 x i32> %x, <i32 7, i32 undef> - %cmp1 = icmp ult <2 x i32> %add1, <i32 30, i32 undef> - %cmp2 = icmp eq <2 x i32> %x, <i32 23, i32 undef> + %add1 = add <2 x i32> %x, <i32 7, i32 poison> + %cmp1 = icmp ult <2 x i32> %add1, <i32 30, i32 poison> + %cmp2 = icmp eq <2 x i32> %x, <i32 23, i32 poison> %ret1 = or <2 x i1> %cmp1, %cmp2 ret <2 x i1> %ret1 } @@ -874,19 +874,19 @@ define <2 x i1> @test46_uniform(<2 x i8> %c) { ret <2 x i1> %or } -define <2 x i1> @test46_undef(<2 x i8> %c) { -; CHECK-LABEL: @test46_undef( -; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], <i8 -97, i8 undef> -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], <i8 26, i8 undef> -; CHECK-NEXT: [[C_OFF17:%.*]] = add <2 x i8> [[C]], <i8 -65, i8 undef> -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], <i8 26, i8 undef> +define <2 x i1> @test46_poison(<2 x i8> %c) { +; CHECK-LABEL: @test46_poison( +; CHECK-NEXT: [[C_OFF:%.*]] = add <2 x i8> [[C:%.*]], <i8 -97, i8 poison> +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult <2 x i8> [[C_OFF]], <i8 26, i8 poison> +; CHECK-NEXT: [[C_OFF17:%.*]] = add <2 x i8> [[C]], <i8 -65, i8 poison> +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult <2 x i8> [[C_OFF17]], <i8 26, i8 poison> ; CHECK-NEXT: [[OR:%.*]] = or <2 x i1> [[CMP1]], [[CMP2]] ; CHECK-NEXT: ret <2 x i1> [[OR]] ; - %c.off = add <2 x i8> %c, <i8 -97, i8 undef> - %cmp1 = icmp ult <2 x i8> %c.off, <i8 26, i8 undef> - %c.off17 = add <2 x i8> %c, <i8 -65, i8 undef> - %cmp2 = icmp ult <2 x i8> %c.off17, <i8 26, i8 undef> + %c.off = add <2 x i8> %c, <i8 -97, i8 poison> + %cmp1 = icmp ult <2 x i8> %c.off, <i8 26, i8 poison> + %c.off17 = add <2 x i8> %c, <i8 -65, i8 poison> + %cmp2 = icmp ult <2 x i8> %c.off17, <i8 26, i8 poison> %or = or <2 x i1> %cmp1, %cmp2 ret <2 x i1> %or } diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll index f0c2f12..5ed7d64 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -89,13 +89,13 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t7 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> ; CHECK-NEXT: [[T1:%.*]] = zext <8 x i32> [[T0]] to <8 x i64> -; CHECK-NEXT: [[T2:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T1]] -; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1> -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, [[NBITS]] +; CHECK-NEXT: [[T2:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T1]] +; CHECK-NEXT: [[T3:%.*]] = xor <8 x i64> [[T2]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1> +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -106,11 +106,11 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: [[T7:%.*]] = and <8 x i32> [[TMP2]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647> ; CHECK-NEXT: ret <8 x i32> [[T7]] ; - %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> + %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> %t1 = zext <8 x i32> %t0 to <8 x i64> - %t2 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t1 ; shifting by nbits-1 - %t3 = xor <8 x i64> %t2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1> - %t4 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, %nbits + %t2 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t1 ; shifting by nbits-1 + %t3 = xor <8 x i64> %t2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1> + %t4 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, %nbits call void @use8xi32(<8 x i32> %t0) call void @use8xi64(<8 x i64> %t1) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll index 46d1de5..1a711e5 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -73,11 +73,11 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t5 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 poison, i32 -33> ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) @@ -87,8 +87,8 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 - %t2 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> + %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 + %t2 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 poison, i32 -33> call void @use8xi64(<8 x i64> %t0) call void @use8xi64(<8 x i64> %t1) @@ -103,8 +103,8 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 poison, i32 65> ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) @@ -114,8 +114,8 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 - %t2 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> + %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 + %t2 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 poison, i32 65> call void @use8xi64(<8 x i64> %t0) call void @use8xi64(<8 x i64> %t1) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll index 4887385..cd0098e 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -81,12 +81,12 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t6 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 poison, i32 -33> ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -97,9 +97,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 + %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 %t2 = lshr <8 x i64> %t1, %t0 - %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 undef, i32 -33> + %t3 = add <8 x i32> %nbits, <i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 -33, i32 poison, i32 -33> call void @use8xi64(<8 x i64> %t0) call void @use8xi64(<8 x i64> %t1) @@ -115,9 +115,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 poison, i32 65> ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T2]]) @@ -128,9 +128,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 + %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 %t2 = lshr <8 x i64> %t1, %t0 - %t3 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 undef, i32 65> + %t3 = add <8 x i32> %nbits, <i32 -64, i32 -63, i32 -33, i32 -32, i32 63, i32 64, i32 poison, i32 65> call void @use8xi64(<8 x i64> %t0) call void @use8xi64(<8 x i64> %t1) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll index 8b3f01b..1debf11 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-b.ll @@ -71,12 +71,12 @@ define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ret <8 x i32> %t5 } -define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t1_vec_splat_undef( -; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> -; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, [[NBITS]] +define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t1_vec_splat_poison( +; CHECK-NEXT: [[T0:%.*]] = add <8 x i32> [[NBITS:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> +; CHECK-NEXT: [[T4:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, [[NBITS]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) @@ -85,11 +85,11 @@ define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: [[T5:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647> ; CHECK-NEXT: ret <8 x i32> [[T5]] ; - %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> - %t1 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %t0 - %t2 = xor <8 x i32> %t1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> + %t0 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> + %t1 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, %t0 + %t2 = xor <8 x i32> %t1, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> %t3 = and <8 x i32> %t2, %x - %t4 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, %nbits + %t4 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, %nbits call void @use8xi32(<8 x i32> %t0) call void @use8xi32(<8 x i32> %t1) call void @use8xi32(<8 x i32> %t2) diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll index 58a9050..55d0b3f 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-c.ll @@ -55,19 +55,19 @@ define <8 x i32> @t1_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ret <8 x i32> %t3 } -define <8 x i32> @t1_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t1_vec_splat_undef( -; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> +define <8 x i32> @t1_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t1_vec_splat_poison( +; CHECK-NEXT: [[T0:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T2]]) ; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> [[X:%.*]], [[T2]] ; CHECK-NEXT: [[T3:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647> ; CHECK-NEXT: ret <8 x i32> [[T3]] ; - %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits + %t0 = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, %nbits %t1 = and <8 x i32> %t0, %x - %t2 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> + %t2 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> call void @use8xi32(<8 x i32> %t0) call void @use8xi32(<8 x i32> %t2) %t3 = shl <8 x i32> %t1, %t2 ; shift is smaller than mask diff --git a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll index 9c096d1..7ad99a6 100644 --- a/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/partally-redundant-left-shift-input-masking-variant-d.ll @@ -63,11 +63,11 @@ define <8 x i32> @t2_vec_splat(<8 x i32> %x, <8 x i32> %nbits) { ret <8 x i32> %t4 } -define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( -; CHECK-NEXT: [[T0:%.*]] = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, [[NBITS:%.*]] +define <8 x i32> @t2_vec_splat_poison(<8 x i32> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( +; CHECK-NEXT: [[T0:%.*]] = shl nsw <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, [[NBITS]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T0]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T1]]) ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[T3]]) @@ -75,10 +75,10 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i32> %x, <8 x i32> %nbits) { ; CHECK-NEXT: [[T4:%.*]] = and <8 x i32> [[TMP1]], <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 poison, i32 2147483647> ; CHECK-NEXT: ret <8 x i32> [[T4]] ; - %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1>, %nbits + %t0 = shl <8 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1>, %nbits %t1 = lshr <8 x i32> %t0, %nbits %t2 = and <8 x i32> %t1, %x - %t3 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 undef, i32 -1> + %t3 = add <8 x i32> %nbits, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 poison, i32 -1> call void @use8xi32(<8 x i32> %t0) call void @use8xi32(<8 x i32> %t1) call void @use8xi32(<8 x i32> %t3) diff --git a/llvm/test/Transforms/InstCombine/pr27236.ll b/llvm/test/Transforms/InstCombine/pr27236.ll index 61ea344..67c320d 100644 --- a/llvm/test/Transforms/InstCombine/pr27236.ll +++ b/llvm/test/Transforms/InstCombine/pr27236.ll @@ -4,7 +4,7 @@ define float @test1(i32 %scale) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.smax.i32(i32 [[SCALE:%.*]], i32 1) -; CHECK-NEXT: [[TMP2:%.*]] = sitofp i32 [[TMP1]] to float +; CHECK-NEXT: [[TMP2:%.*]] = uitofp nneg i32 [[TMP1]] to float ; CHECK-NEXT: ret float [[TMP2]] ; %1 = icmp sgt i32 1, %scale diff --git a/llvm/test/Transforms/InstCombine/pr53357.ll b/llvm/test/Transforms/InstCombine/pr53357.ll index 0a6d299..0ae6908 100644 --- a/llvm/test/Transforms/InstCombine/pr53357.ll +++ b/llvm/test/Transforms/InstCombine/pr53357.ll @@ -30,16 +30,16 @@ define <2 x i32> @src_vec(<2 x i32> noundef %0, <2 x i32> noundef %1) { ret <2 x i32> %6 } -; vector version of src with undef values -define <2 x i32> @src_vec_undef(<2 x i32> noundef %0, <2 x i32> noundef %1) { -; CHECK-LABEL: @src_vec_undef( +; vector version of src with poison values +define <2 x i32> @src_vec_poison(<2 x i32> noundef %0, <2 x i32> noundef %1) { +; CHECK-LABEL: @src_vec_poison( ; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP1:%.*]], [[TMP0:%.*]] ; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1> ; CHECK-NEXT: ret <2 x i32> [[TMP4]] ; %3 = and <2 x i32> %1, %0 %4 = or <2 x i32> %1, %0 - %5 = xor <2 x i32> %4, <i32 -1, i32 undef> + %5 = xor <2 x i32> %4, <i32 -1, i32 poison> %6 = add <2 x i32> %3, %5 ret <2 x i32> %6 } diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll index d49cfe9..cb6775e 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-b.ll @@ -89,12 +89,12 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t6 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1> -; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, [[NBITS]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = xor <8 x i64> [[T1]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1> +; CHECK-NEXT: [[T3:%.*]] = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, [[NBITS]] ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi32(<8 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) @@ -107,9 +107,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 - %t2 = xor <8 x i64> %t1, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1> - %t3 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 32>, %nbits + %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 + %t2 = xor <8 x i64> %t1, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1> + %t3 = sub <8 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 poison, i32 32>, %nbits %t4 = and <8 x i64> %t2, %x call void @use8xi32(<8 x i32> %nbits) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll index fbbeffb..a782467 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-c.ll @@ -77,11 +77,11 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t5 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32> +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 poison, i32 -32> ; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -92,8 +92,8 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 - %t2 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32> + %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 + %t2 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 poison, i32 -32> %t3 = and <8 x i64> %t1, %x call void @use8xi64(<8 x i64> %t0) @@ -109,8 +109,8 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] -; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64> +; CHECK-NEXT: [[T1:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] +; CHECK-NEXT: [[T2:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 poison, i32 64> ; CHECK-NEXT: [[T3:%.*]] = and <8 x i64> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -121,8 +121,8 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T5]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 - %t2 = add <8 x i32> %nbits, <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64> + %t1 = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 + %t2 = add <8 x i32> %nbits, <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 poison, i32 64> %t3 = and <8 x i64> %t1, %x call void @use8xi64(<8 x i64> %t0) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll index 1a977f6..b79ab79 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-after-truncation-variant-d.ll @@ -85,12 +85,12 @@ define <8 x i32> @t1_vec_splat(<8 x i64> %x, <8 x i32> %nbits) { ret <8 x i32> %t6 } -define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { -; CHECK-LABEL: @t2_vec_splat_undef( +define <8 x i32> @t2_vec_splat_poison(<8 x i64> %x, <8 x i32> %nbits) { +; CHECK-LABEL: @t2_vec_splat_poison( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32> +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 poison, i32 -32> ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -102,9 +102,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 + %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 %t2 = lshr <8 x i64> %t1, %t0 - %t3 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 undef, i32 -32> + %t3 = add <8 x i32> %nbits, <i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 -32, i32 poison, i32 -32> %t4 = and <8 x i64> %t2, %x call void @use8xi64(<8 x i64> %t0) @@ -121,9 +121,9 @@ define <8 x i32> @t2_vec_splat_undef(<8 x i64> %x, <8 x i32> %nbits) { define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-LABEL: @t3_vec_nonsplat( ; CHECK-NEXT: [[T0:%.*]] = zext <8 x i32> [[NBITS:%.*]] to <8 x i64> -; CHECK-NEXT: [[T1:%.*]] = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, [[T0]] +; CHECK-NEXT: [[T1:%.*]] = shl nsw <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, [[T0]] ; CHECK-NEXT: [[T2:%.*]] = lshr <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, [[T0]] -; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64> +; CHECK-NEXT: [[T3:%.*]] = add <8 x i32> [[NBITS]], <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 poison, i32 64> ; CHECK-NEXT: [[T4:%.*]] = and <8 x i64> [[T2]], [[X:%.*]] ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T0]]) ; CHECK-NEXT: call void @use8xi64(<8 x i64> [[T1]]) @@ -135,9 +135,9 @@ define <8 x i32> @t3_vec_nonsplat(<8 x i64> %x, <8 x i32> %nbits) { ; CHECK-NEXT: ret <8 x i32> [[T6]] ; %t0 = zext <8 x i32> %nbits to <8 x i64> - %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 undef, i64 -1>, %t0 + %t1 = shl <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 poison, i64 -1>, %t0 %t2 = lshr <8 x i64> %t1, %t0 - %t3 = add <8 x i32> %nbits, <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 undef, i32 64> + %t3 = add <8 x i32> %nbits, <i32 -32, i32 -1, i32 0, i32 1, i32 31, i32 32, i32 poison, i32 64> %t4 = and <8 x i64> %t2, %x call void @use8xi64(<8 x i64> %t0) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll index ddaef5f..4b955a8 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-b.ll @@ -155,12 +155,12 @@ define <3 x i32> @t4_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ret <3 x i32> %t5 } -define <3 x i32> @t5_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { -; CHECK-LABEL: @t5_vec_undef( -; CHECK-NEXT: [[T1:%.*]] = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, [[NBITS:%.*]] -; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], <i32 -1, i32 undef, i32 -1> +define <3 x i32> @t5_vec_poison(<3 x i32> %x, <3 x i32> %nbits) { +; CHECK-LABEL: @t5_vec_poison( +; CHECK-NEXT: [[T1:%.*]] = shl nsw <3 x i32> <i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]] +; CHECK-NEXT: [[T2:%.*]] = xor <3 x i32> [[T1]], <i32 -1, i32 poison, i32 -1> ; CHECK-NEXT: [[T3:%.*]] = and <3 x i32> [[T2]], [[X:%.*]] -; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> <i32 32, i32 undef, i32 32>, [[NBITS]] +; CHECK-NEXT: [[T4:%.*]] = sub <3 x i32> <i32 32, i32 poison, i32 32>, [[NBITS]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[NBITS]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) @@ -169,11 +169,11 @@ define <3 x i32> @t5_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-NEXT: [[T5:%.*]] = shl <3 x i32> [[X]], [[T4]] ; CHECK-NEXT: ret <3 x i32> [[T5]] ; - %t0 = add <3 x i32> %nbits, <i32 0, i32 undef, i32 0> - %t1 = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, %t0 - %t2 = xor <3 x i32> %t1, <i32 -1, i32 undef, i32 -1> + %t0 = add <3 x i32> %nbits, <i32 0, i32 poison, i32 0> + %t1 = shl <3 x i32> <i32 -1, i32 poison, i32 -1>, %t0 + %t2 = xor <3 x i32> %t1, <i32 -1, i32 poison, i32 -1> %t3 = and <3 x i32> %t2, %x - %t4 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %nbits + %t4 = sub <3 x i32> <i32 32, i32 poison, i32 32>, %nbits call void @use3xi32(<3 x i32> %t0) call void @use3xi32(<3 x i32> %t1) call void @use3xi32(<3 x i32> %t2) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll index c7747cf..8428ef6 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-c.ll @@ -99,20 +99,20 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ret <3 x i32> %t3 } -define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { -; CHECK-LABEL: @t4_vec_undef( -; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> <i32 -1, i32 undef, i32 -1>, [[NBITS:%.*]] +define <3 x i32> @t4_vec_poison(<3 x i32> %x, <3 x i32> %nbits) { +; CHECK-LABEL: @t4_vec_poison( +; CHECK-NEXT: [[T0:%.*]] = lshr <3 x i32> <i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = and <3 x i32> [[T0]], [[X:%.*]] -; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 undef, i32 1> +; CHECK-NEXT: [[T2:%.*]] = add <3 x i32> [[NBITS]], <i32 1, i32 poison, i32 1> ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T1]]) ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T2]]) ; CHECK-NEXT: [[T3:%.*]] = shl <3 x i32> [[X]], [[T2]] ; CHECK-NEXT: ret <3 x i32> [[T3]] ; - %t0 = lshr <3 x i32> <i32 -1, i32 undef, i32 -1>, %nbits + %t0 = lshr <3 x i32> <i32 -1, i32 poison, i32 -1>, %nbits %t1 = and <3 x i32> %t0, %x - %t2 = add <3 x i32> %nbits, <i32 1, i32 undef, i32 1> + %t2 = add <3 x i32> %nbits, <i32 1, i32 poison, i32 1> call void @use3xi32(<3 x i32> %t0) call void @use3xi32(<3 x i32> %t1) call void @use3xi32(<3 x i32> %t2) diff --git a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll index 549729f..5d8ff9e 100644 --- a/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll +++ b/llvm/test/Transforms/InstCombine/redundant-left-shift-input-masking-variant-d.ll @@ -115,9 +115,9 @@ define <3 x i32> @t3_vec_nonsplat(<3 x i32> %x, <3 x i32> %nbits) { ret <3 x i32> %t4 } -define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { -; CHECK-LABEL: @t4_vec_undef( -; CHECK-NEXT: [[T0:%.*]] = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, [[NBITS:%.*]] +define <3 x i32> @t4_vec_poison(<3 x i32> %x, <3 x i32> %nbits) { +; CHECK-LABEL: @t4_vec_poison( +; CHECK-NEXT: [[T0:%.*]] = shl nsw <3 x i32> <i32 -1, i32 poison, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[T1:%.*]] = lshr <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS]] ; CHECK-NEXT: [[T2:%.*]] = and <3 x i32> [[T1]], [[X:%.*]] ; CHECK-NEXT: call void @use3xi32(<3 x i32> [[T0]]) @@ -127,10 +127,10 @@ define <3 x i32> @t4_vec_undef(<3 x i32> %x, <3 x i32> %nbits) { ; CHECK-NEXT: [[T4:%.*]] = shl <3 x i32> [[X]], [[NBITS]] ; CHECK-NEXT: ret <3 x i32> [[T4]] ; - %t0 = shl <3 x i32> <i32 -1, i32 undef, i32 -1>, %nbits + %t0 = shl <3 x i32> <i32 -1, i32 poison, i32 -1>, %nbits %t1 = lshr <3 x i32> %t0, %nbits %t2 = and <3 x i32> %t1, %x - %t3 = add <3 x i32> %nbits, <i32 0, i32 undef, i32 0> + %t3 = add <3 x i32> %nbits, <i32 0, i32 poison, i32 0> call void @use3xi32(<3 x i32> %t0) call void @use3xi32(<3 x i32> %t1) call void @use3xi32(<3 x i32> %t2) diff --git a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll index fd0d942..301ead7 100644 --- a/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll +++ b/llvm/test/Transforms/InstCombine/reuse-constant-from-select-in-icmp.ll @@ -102,36 +102,36 @@ define <2 x i32> @p7_vec_splat_sgt(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %r } -; Vectors with undef +; Vectors with poison -define <2 x i32> @p8_vec_nonsplat_undef0(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @p8_vec_nonsplat_undef0( +define <2 x i32> @p8_vec_nonsplat_poison0(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @p8_vec_nonsplat_poison0( ; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535> ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 65535>, <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %t = icmp ult <2 x i32> %x, <i32 65536, i32 undef> + %t = icmp ult <2 x i32> %x, <i32 65536, i32 poison> %r = select <2 x i1> %t, <2 x i32> %y, <2 x i32> <i32 65535, i32 65535> ret <2 x i32> %r } -define <2 x i32> @p9_vec_nonsplat_undef1(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @p9_vec_nonsplat_undef1( +define <2 x i32> @p9_vec_nonsplat_poison1(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @p9_vec_nonsplat_poison1( ; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535> -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 undef>, <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 poison>, <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %t = icmp ult <2 x i32> %x, <i32 65536, i32 65536> - %r = select <2 x i1> %t, <2 x i32> %y, <2 x i32> <i32 65535, i32 undef> + %r = select <2 x i1> %t, <2 x i32> %y, <2 x i32> <i32 65535, i32 poison> ret <2 x i32> %r } -define <2 x i32> @p10_vec_nonsplat_undef2(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @p10_vec_nonsplat_undef2( +define <2 x i32> @p10_vec_nonsplat_poison2(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @p10_vec_nonsplat_poison2( ; CHECK-NEXT: [[T_INV:%.*]] = icmp ugt <2 x i32> [[X:%.*]], <i32 65535, i32 65535> -; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 undef>, <2 x i32> [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[T_INV]], <2 x i32> <i32 65535, i32 poison>, <2 x i32> [[Y:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %t = icmp ult <2 x i32> %x, <i32 65536, i32 undef> - %r = select <2 x i1> %t, <2 x i32> %y, <2 x i32> <i32 65535, i32 undef> + %t = icmp ult <2 x i32> %x, <i32 65536, i32 poison> + %r = select <2 x i1> %t, <2 x i32> %y, <2 x i32> <i32 65535, i32 poison> ret <2 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/rotate.ll b/llvm/test/Transforms/InstCombine/rotate.ll index 6c70c79..eec623e 100644 --- a/llvm/test/Transforms/InstCombine/rotate.ll +++ b/llvm/test/Transforms/InstCombine/rotate.ll @@ -65,24 +65,24 @@ define <2 x i16> @rotl_v2i16_constant_splat(<2 x i16> %x) { ret <2 x i16> %r } -define <2 x i16> @rotl_v2i16_constant_splat_undef0(<2 x i16> %x) { -; CHECK-LABEL: @rotl_v2i16_constant_splat_undef0( +define <2 x i16> @rotl_v2i16_constant_splat_poison0(<2 x i16> %x) { +; CHECK-LABEL: @rotl_v2i16_constant_splat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>) ; CHECK-NEXT: ret <2 x i16> [[R]] ; - %shl = shl <2 x i16> %x, <i16 undef, i16 1> + %shl = shl <2 x i16> %x, <i16 poison, i16 1> %shr = lshr <2 x i16> %x, <i16 15, i16 15> %r = or <2 x i16> %shl, %shr ret <2 x i16> %r } -define <2 x i16> @rotl_v2i16_constant_splat_undef1(<2 x i16> %x) { -; CHECK-LABEL: @rotl_v2i16_constant_splat_undef1( +define <2 x i16> @rotl_v2i16_constant_splat_poison1(<2 x i16> %x) { +; CHECK-LABEL: @rotl_v2i16_constant_splat_poison1( ; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[X]], <2 x i16> <i16 1, i16 1>) ; CHECK-NEXT: ret <2 x i16> [[R]] ; %shl = shl <2 x i16> %x, <i16 1, i16 1> - %shr = lshr <2 x i16> %x, <i16 15, i16 undef> + %shr = lshr <2 x i16> %x, <i16 15, i16 poison> %r = or <2 x i16> %shl, %shr ret <2 x i16> %r } @@ -100,30 +100,30 @@ define <2 x i17> @rotr_v2i17_constant_splat(<2 x i17> %x) { ret <2 x i17> %r } -define <2 x i17> @rotr_v2i17_constant_splat_undef0(<2 x i17> %x) { -; CHECK-LABEL: @rotr_v2i17_constant_splat_undef0( +define <2 x i17> @rotr_v2i17_constant_splat_poison0(<2 x i17> %x) { +; CHECK-LABEL: @rotr_v2i17_constant_splat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>) ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shl = shl <2 x i17> %x, <i17 12, i17 undef> - %shr = lshr <2 x i17> %x, <i17 undef, i17 5> + %shl = shl <2 x i17> %x, <i17 12, i17 poison> + %shr = lshr <2 x i17> %x, <i17 poison, i17 5> %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } -define <2 x i17> @rotr_v2i17_constant_splat_undef1(<2 x i17> %x) { -; CHECK-LABEL: @rotr_v2i17_constant_splat_undef1( +define <2 x i17> @rotr_v2i17_constant_splat_poison1(<2 x i17> %x) { +; CHECK-LABEL: @rotr_v2i17_constant_splat_poison1( ; CHECK-NEXT: [[R:%.*]] = call <2 x i17> @llvm.fshl.v2i17(<2 x i17> [[X:%.*]], <2 x i17> [[X]], <2 x i17> <i17 12, i17 12>) ; CHECK-NEXT: ret <2 x i17> [[R]] ; - %shl = shl <2 x i17> %x, <i17 12, i17 undef> - %shr = lshr <2 x i17> %x, <i17 5, i17 undef> + %shl = shl <2 x i17> %x, <i17 12, i17 poison> + %shr = lshr <2 x i17> %x, <i17 5, i17 poison> %r = or <2 x i17> %shr, %shl ret <2 x i17> %r } ; Allow arbitrary shift constants. -; Support undef elements. +; Support poison elements. define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) { ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat( @@ -136,17 +136,6 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat(<2 x i32> %x) { ret <2 x i32> %r } -define <2 x i32> @rotr_v2i32_constant_nonsplat_undef0(<2 x i32> %x) { -; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef0( -; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 0, i32 19>) -; CHECK-NEXT: ret <2 x i32> [[R]] -; - %shl = shl <2 x i32> %x, <i32 undef, i32 19> - %shr = lshr <2 x i32> %x, <i32 15, i32 13> - %r = or <2 x i32> %shl, %shr - ret <2 x i32> %r -} - define <2 x i32> @rotr_v2i32_constant_nonsplat_poison0(<2 x i32> %x) { ; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison0( ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 poison, i32 19>) @@ -158,13 +147,13 @@ define <2 x i32> @rotr_v2i32_constant_nonsplat_poison0(<2 x i32> %x) { ret <2 x i32> %r } -define <2 x i32> @rotr_v2i32_constant_nonsplat_undef1(<2 x i32> %x) { -; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_undef1( +define <2 x i32> @rotr_v2i32_constant_nonsplat_poison1(<2 x i32> %x) { +; CHECK-LABEL: @rotr_v2i32_constant_nonsplat_poison1( ; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[X]], <2 x i32> <i32 17, i32 0>) ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shl = shl <2 x i32> %x, <i32 17, i32 19> - %shr = lshr <2 x i32> %x, <i32 15, i32 undef> + %shr = lshr <2 x i32> %x, <i32 15, i32 poison> %r = or <2 x i32> %shl, %shr ret <2 x i32> %r } @@ -180,13 +169,13 @@ define <2 x i36> @rotl_v2i36_constant_nonsplat(<2 x i36> %x) { ret <2 x i36> %r } -define <3 x i36> @rotl_v3i36_constant_nonsplat_undef0(<3 x i36> %x) { -; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_undef0( -; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 0>) +define <3 x i36> @rotl_v3i36_constant_nonsplat_poison0(<3 x i36> %x) { +; CHECK-LABEL: @rotl_v3i36_constant_nonsplat_poison0( +; CHECK-NEXT: [[R:%.*]] = call <3 x i36> @llvm.fshl.v3i36(<3 x i36> [[X:%.*]], <3 x i36> [[X]], <3 x i36> <i36 21, i36 11, i36 poison>) ; CHECK-NEXT: ret <3 x i36> [[R]] ; - %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 undef> - %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 undef> + %shl = shl <3 x i36> %x, <i36 21, i36 11, i36 poison> + %shr = lshr <3 x i36> %x, <i36 15, i36 25, i36 poison> %r = or <3 x i36> %shl, %shr ret <3 x i36> %r } diff --git a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll index c1bb694..57977a7 100644 --- a/llvm/test/Transforms/InstCombine/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/saturating-add-sub.ll @@ -559,14 +559,14 @@ define <2 x i8> @test_simplify_decrement_vec(<2 x i8> %a) { ret <2 x i8> %i2 } -define <2 x i8> @test_simplify_decrement_vec_undef(<2 x i8> %a) { -; CHECK-LABEL: @test_simplify_decrement_vec_undef( +define <2 x i8> @test_simplify_decrement_vec_poison(<2 x i8> %a) { +; CHECK-LABEL: @test_simplify_decrement_vec_poison( ; CHECK-NEXT: [[I2:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[A:%.*]], <2 x i8> <i8 1, i8 1>) ; CHECK-NEXT: ret <2 x i8> [[I2]] ; %i = icmp eq <2 x i8> %a, <i8 0, i8 0> %i1 = sub <2 x i8> %a, <i8 1, i8 1> - %i2 = select <2 x i1> %i, <2 x i8> <i8 0, i8 undef>, <2 x i8> %i1 + %i2 = select <2 x i1> %i, <2 x i8> <i8 0, i8 poison>, <2 x i8> %i1 ret <2 x i8> %i2 } @@ -1818,14 +1818,14 @@ define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) { define <4 x i32> @uadd_sat_constant_vec_commute_undefs(<4 x i32> %x) { ; CHECK-LABEL: @uadd_sat_constant_vec_commute_undefs( -; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 undef> -; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i32> [[X]], <i32 -43, i32 -43, i32 undef, i32 -43> -; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> [[A]], <4 x i32> <i32 -1, i32 undef, i32 -1, i32 -1> +; CHECK-NEXT: [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 poison> +; CHECK-NEXT: [[C:%.*]] = icmp ult <4 x i32> [[X]], <i32 -43, i32 -43, i32 poison, i32 -43> +; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[C]], <4 x i32> [[A]], <4 x i32> <i32 -1, i32 poison, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i32> [[R]] ; - %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 undef> - %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 undef, i32 -43> - %r = select <4 x i1> %c, <4 x i32> %a, <4 x i32> <i32 -1, i32 undef, i32 -1, i32 -1> + %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 poison> + %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 poison, i32 -43> + %r = select <4 x i1> %c, <4 x i32> %a, <4 x i32> <i32 -1, i32 poison, i32 -1, i32 -1> ret <4 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/select-of-bittest.ll b/llvm/test/Transforms/InstCombine/select-of-bittest.ll index a6f14cb..e3eb76d 100644 --- a/llvm/test/Transforms/InstCombine/select-of-bittest.ll +++ b/llvm/test/Transforms/InstCombine/select-of-bittest.ll @@ -80,19 +80,18 @@ define <2 x i32> @and_lshr_and_vec_v2(<2 x i32> %arg) { ret <2 x i32> %t4 } -define <3 x i32> @and_lshr_and_vec_undef(<3 x i32> %arg) { -; CHECK-LABEL: @and_lshr_and_vec_undef( +define <3 x i32> @and_lshr_and_vec_poison(<3 x i32> %arg) { +; CHECK-LABEL: @and_lshr_and_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 poison, i32 3> ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T4:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[T4]] ; - %t = and <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t1 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> - %t2 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t3 = and <3 x i32> %t2, <i32 1, i32 undef, i32 1> - ; The second element of %t4 is poison because it is (undef ? poison : undef). - %t4 = select <3 x i1> %t1, <3 x i32> %t3, <3 x i32> <i32 1, i32 undef, i32 1> + %t = and <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t1 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> + %t2 = lshr <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t3 = and <3 x i32> %t2, <i32 1, i32 poison, i32 1> + %t4 = select <3 x i1> %t1, <3 x i32> %t3, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t4 } @@ -138,17 +137,17 @@ define <2 x i32> @and_and_vec(<2 x i32> %arg) { ret <2 x i32> %t3 } -define <3 x i32> @and_and_vec_undef(<3 x i32> %arg) { -; CHECK-LABEL: @and_and_vec_undef( -; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 -1, i32 3> +define <3 x i32> @and_and_vec_poison(<3 x i32> %arg) { +; CHECK-LABEL: @and_and_vec_poison( +; CHECK-NEXT: [[TMP1:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 3, i32 poison, i32 3> ; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <3 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[T3:%.*]] = zext <3 x i1> [[TMP2]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[T3]] ; - %t = and <3 x i32> %arg, <i32 2, i32 undef, i32 2> - %t1 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> - %t2 = and <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t3 = select <3 x i1> %t1, <3 x i32> %t2, <3 x i32> <i32 1, i32 undef, i32 1> + %t = and <3 x i32> %arg, <i32 2, i32 poison, i32 2> + %t1 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> + %t2 = and <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t3 = select <3 x i1> %t1, <3 x i32> %t2, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t3 } @@ -221,8 +220,8 @@ define <2 x i32> @f_var0_vec(<2 x i32> %arg, <2 x i32> %arg1) { ret <2 x i32> %t5 } -define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { -; CHECK-LABEL: @f_var0_vec_undef( +define <3 x i32> @f_var0_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) { +; CHECK-LABEL: @f_var0_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer @@ -230,11 +229,11 @@ define <3 x i32> @f_var0_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-NEXT: ret <3 x i32> [[T5]] ; %t = and <3 x i32> %arg, %arg1 - %t2 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> - %t3 = lshr <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t4 = and <3 x i32> %t3, <i32 1, i32 undef, i32 1> - ; The second element of %t5 is poison because it is (undef ? poison : undef). - %t5 = select <3 x i1> %t2, <3 x i32> %t4, <3 x i32> <i32 1, i32 undef, i32 1> + %t2 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> + %t3 = lshr <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t4 = and <3 x i32> %t3, <i32 1, i32 poison, i32 1> + ; The second element of %t5 is poison because it is (poison ? poison : poison). + %t5 = select <3 x i1> %t2, <3 x i32> %t4, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t5 } @@ -284,8 +283,8 @@ define <2 x i32> @f_var1_vec(<2 x i32> %arg, <2 x i32> %arg1) { ret <2 x i32> %t4 } -define <3 x i32> @f_var1_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { -; CHECK-LABEL: @f_var1_vec_undef( +define <3 x i32> @f_var1_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) { +; CHECK-LABEL: @f_var1_vec_poison( ; CHECK-NEXT: [[TMP1:%.*]] = or <3 x i32> [[ARG1:%.*]], <i32 1, i32 1, i32 1> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[ARG:%.*]] ; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer @@ -293,9 +292,9 @@ define <3 x i32> @f_var1_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { ; CHECK-NEXT: ret <3 x i32> [[T4]] ; %t = and <3 x i32> %arg, %arg1 - %t2 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> - %t3 = and <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t4 = select <3 x i1> %t2, <3 x i32> %t3, <3 x i32> <i32 1, i32 undef, i32 1> + %t2 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> + %t3 = and <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t4 = select <3 x i1> %t2, <3 x i32> %t3, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t4 } @@ -354,20 +353,20 @@ define <2 x i32> @f_var2_vec(<2 x i32> %arg, <2 x i32> %arg1) { ret <2 x i32> %t5 } -define <3 x i32> @f_var2_vec_undef(<3 x i32> %arg, <3 x i32> %arg1) { -; CHECK-LABEL: @f_var2_vec_undef( -; CHECK-NEXT: [[T:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 1, i32 undef, i32 1> -; CHECK-NEXT: [[T2:%.*]] = icmp eq <3 x i32> [[T]], <i32 0, i32 undef, i32 0> +define <3 x i32> @f_var2_vec_poison(<3 x i32> %arg, <3 x i32> %arg1) { +; CHECK-LABEL: @f_var2_vec_poison( +; CHECK-NEXT: [[T:%.*]] = and <3 x i32> [[ARG:%.*]], <i32 1, i32 poison, i32 1> +; CHECK-NEXT: [[T2:%.*]] = icmp eq <3 x i32> [[T]], <i32 0, i32 poison, i32 0> ; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[ARG]], [[ARG1:%.*]] -; CHECK-NEXT: [[T4:%.*]] = and <3 x i32> [[T3]], <i32 1, i32 undef, i32 1> -; CHECK-NEXT: [[T5:%.*]] = select <3 x i1> [[T2]], <3 x i32> [[T4]], <3 x i32> <i32 1, i32 undef, i32 1> +; CHECK-NEXT: [[T4:%.*]] = and <3 x i32> [[T3]], <i32 1, i32 poison, i32 1> +; CHECK-NEXT: [[T5:%.*]] = select <3 x i1> [[T2]], <3 x i32> [[T4]], <3 x i32> <i32 1, i32 poison, i32 1> ; CHECK-NEXT: ret <3 x i32> [[T5]] ; - %t = and <3 x i32> %arg, <i32 1, i32 undef, i32 1> - %t2 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> + %t = and <3 x i32> %arg, <i32 1, i32 poison, i32 1> + %t2 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> %t3 = lshr <3 x i32> %arg, %arg1 - %t4 = and <3 x i32> %t3, <i32 1, i32 undef, i32 1> - %t5 = select <3 x i1> %t2, <3 x i32> %t4, <3 x i32> <i32 1, i32 undef, i32 1> + %t4 = and <3 x i32> %t3, <i32 1, i32 poison, i32 1> + %t5 = select <3 x i1> %t2, <3 x i32> %t4, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t5 } @@ -427,20 +426,20 @@ define <2 x i32> @f_var3_splatvec(<2 x i32> %arg, <2 x i32> %arg1, <2 x i32> %ar ret <2 x i32> %t6 } -define <3 x i32> @f_var3_vec_undef(<3 x i32> %arg, <3 x i32> %arg1, <3 x i32> %arg2) { -; CHECK-LABEL: @f_var3_vec_undef( +define <3 x i32> @f_var3_vec_poison(<3 x i32> %arg, <3 x i32> %arg1, <3 x i32> %arg2) { +; CHECK-LABEL: @f_var3_vec_poison( ; CHECK-NEXT: [[T:%.*]] = and <3 x i32> [[ARG:%.*]], [[ARG1:%.*]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq <3 x i32> [[T]], <i32 0, i32 undef, i32 0> +; CHECK-NEXT: [[T3:%.*]] = icmp eq <3 x i32> [[T]], <i32 0, i32 poison, i32 0> ; CHECK-NEXT: [[T4:%.*]] = lshr <3 x i32> [[ARG]], [[ARG2:%.*]] -; CHECK-NEXT: [[T5:%.*]] = and <3 x i32> [[T4]], <i32 1, i32 undef, i32 1> -; CHECK-NEXT: [[T6:%.*]] = select <3 x i1> [[T3]], <3 x i32> [[T5]], <3 x i32> <i32 1, i32 undef, i32 1> +; CHECK-NEXT: [[T5:%.*]] = and <3 x i32> [[T4]], <i32 1, i32 poison, i32 1> +; CHECK-NEXT: [[T6:%.*]] = select <3 x i1> [[T3]], <3 x i32> [[T5]], <3 x i32> <i32 1, i32 poison, i32 1> ; CHECK-NEXT: ret <3 x i32> [[T6]] ; %t = and <3 x i32> %arg, %arg1 - %t3 = icmp eq <3 x i32> %t, <i32 0, i32 undef, i32 0> + %t3 = icmp eq <3 x i32> %t, <i32 0, i32 poison, i32 0> %t4 = lshr <3 x i32> %arg, %arg2 - %t5 = and <3 x i32> %t4, <i32 1, i32 undef, i32 1> - %t6 = select <3 x i1> %t3, <3 x i32> %t5, <3 x i32> <i32 1, i32 undef, i32 1> + %t5 = and <3 x i32> %t4, <i32 1, i32 poison, i32 1> + %t6 = select <3 x i1> %t3, <3 x i32> %t5, <3 x i32> <i32 1, i32 poison, i32 1> ret <3 x i32> %t6 } diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index bd8145a..8654691 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -3109,45 +3109,46 @@ define <4 x i32> @mul_select_eq_zero_vector(<4 x i32> %x, <4 x i32> %y) { } ; Check that a select is folded into multiplication if condition's operand -; is a vector consisting of zeros and undefs. -; select (<k x elt> x == {0, undef, ...}), <k x elt> 0, <k x elt> x * y --> freeze(y) * x -define <2 x i32> @mul_select_eq_undef_vector(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @mul_select_eq_undef_vector( -; CHECK-NEXT: [[Y_FR:%.*]] = freeze <2 x i32> [[Y:%.*]] +; is a vector consisting of zeros and poisons. +; select (<k x elt> x == {0, poison, ...}), <k x elt> 0, <k x elt> x * y --> freeze(y) * x +define <2 x i32> @mul_select_eq_poison_vector(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_select_eq_poison_vector( +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[Y_FR:%.*]], <i32 0, i32 poison> ; CHECK-NEXT: [[M:%.*]] = mul <2 x i32> [[Y_FR]], [[X:%.*]] -; CHECK-NEXT: ret <2 x i32> [[M]] +; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 0, i32 42>, <2 x i32> [[M]] +; CHECK-NEXT: ret <2 x i32> [[R]] ; - %c = icmp eq <2 x i32> %x, <i32 0, i32 undef> + %c = icmp eq <2 x i32> %x, <i32 0, i32 poison> %m = mul <2 x i32> %x, %y %r = select <2 x i1> %c, <2 x i32> <i32 0, i32 42>, <2 x i32> %m ret <2 x i32> %r } ; Check that a select is folded into multiplication if other select's operand -; is a vector consisting of zeros and undefs. -; select (<k x elt> x == 0), <k x elt> {0, undef, ...}, <k x elt> x * y --> freeze(y) * x -define <2 x i32> @mul_select_eq_zero_sel_undef_vector(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @mul_select_eq_zero_sel_undef_vector( +; is a vector consisting of zeros and poisons. +; select (<k x elt> x == 0), <k x elt> {0, poison, ...}, <k x elt> x * y --> freeze(y) * x +define <2 x i32> @mul_select_eq_zero_sel_poison_vector(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_select_eq_zero_sel_poison_vector( ; CHECK-NEXT: [[Y_FR:%.*]] = freeze <2 x i32> [[Y:%.*]] ; CHECK-NEXT: [[M:%.*]] = mul <2 x i32> [[Y_FR]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[M]] ; %c = icmp eq <2 x i32> %x, zeroinitializer %m = mul <2 x i32> %x, %y - %r = select <2 x i1> %c, <2 x i32> <i32 0, i32 undef>, <2 x i32> %m + %r = select <2 x i1> %c, <2 x i32> <i32 0, i32 poison>, <2 x i32> %m ret <2 x i32> %r } ; Negative test: select should not be folded into mul because ; condition's operand and select's operand do not merge into zero vector. -define <2 x i32> @mul_select_eq_undef_vector_not_merging_to_zero(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @mul_select_eq_undef_vector_not_merging_to_zero( -; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 undef> +define <2 x i32> @mul_select_eq_poison_vector_not_merging_to_zero(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @mul_select_eq_poison_vector_not_merging_to_zero( +; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i32> [[X:%.*]], <i32 0, i32 poison> ; CHECK-NEXT: [[M:%.*]] = mul <2 x i32> [[X]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C]], <2 x i32> <i32 1, i32 0>, <2 x i32> [[M]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %c = icmp eq <2 x i32> %x, <i32 0, i32 undef> + %c = icmp eq <2 x i32> %x, <i32 0, i32 poison> %m = mul <2 x i32> %x, %y %r = select <2 x i1> %c, <2 x i32> <i32 1, i32 0>, <2 x i32> %m ret <2 x i32> %r diff --git a/llvm/test/Transforms/InstCombine/select_meta.ll b/llvm/test/Transforms/InstCombine/select_meta.ll index aa794e8..3898fd9 100644 --- a/llvm/test/Transforms/InstCombine/select_meta.ll +++ b/llvm/test/Transforms/InstCombine/select_meta.ll @@ -301,15 +301,15 @@ define <2 x i32> @not_cond_vec(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) { ret <2 x i32> %r } -; Should match vector 'not' with undef element. +; Should match vector 'not' with poison element. ; The condition is inverted, and the select ops are swapped. The metadata should be swapped. -define <2 x i32> @not_cond_vec_undef(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) { -; CHECK-LABEL: @not_cond_vec_undef( +define <2 x i32> @not_cond_vec_poison(<2 x i1> %c, <2 x i32> %tv, <2 x i32> %fv) { +; CHECK-LABEL: @not_cond_vec_poison( ; CHECK-NEXT: [[R:%.*]] = select <2 x i1> [[C:%.*]], <2 x i32> [[FV:%.*]], <2 x i32> [[TV:%.*]], !prof [[PROF1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %notc = xor <2 x i1> %c, <i1 undef, i1 true> + %notc = xor <2 x i1> %c, <i1 poison, i1 true> %r = select <2 x i1> %notc, <2 x i32> %tv, <2 x i32> %fv, !prof !1 ret <2 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll index 3ee0224..a3c8d33 100644 --- a/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll +++ b/llvm/test/Transforms/InstCombine/set-lowbits-mask-canonicalize.ll @@ -196,36 +196,36 @@ define <2 x i32> @shl_add_vec(<2 x i32> %NBits) { ret <2 x i32> %ret } -define <3 x i32> @shl_add_vec_undef0(<3 x i32> %NBits) { -; CHECK-LABEL: @shl_add_vec_undef0( +define <3 x i32> @shl_add_vec_poison0(<3 x i32> %NBits) { +; CHECK-LABEL: @shl_add_vec_poison0( ; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <3 x i32> [[RET]] ; - %setbit = shl <3 x i32> <i32 1, i32 undef, i32 1>, %NBits + %setbit = shl <3 x i32> <i32 1, i32 poison, i32 1>, %NBits %ret = add <3 x i32> %setbit, <i32 -1, i32 -1, i32 -1> ret <3 x i32> %ret } -define <3 x i32> @shl_add_vec_undef1(<3 x i32> %NBits) { -; CHECK-LABEL: @shl_add_vec_undef1( +define <3 x i32> @shl_add_vec_poison1(<3 x i32> %NBits) { +; CHECK-LABEL: @shl_add_vec_poison1( ; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <3 x i32> [[RET]] ; %setbit = shl <3 x i32> <i32 1, i32 1, i32 1>, %NBits - %ret = add <3 x i32> %setbit, <i32 -1, i32 undef, i32 -1> + %ret = add <3 x i32> %setbit, <i32 -1, i32 poison, i32 -1> ret <3 x i32> %ret } -define <3 x i32> @shl_add_vec_undef2(<3 x i32> %NBits) { -; CHECK-LABEL: @shl_add_vec_undef2( +define <3 x i32> @shl_add_vec_poison2(<3 x i32> %NBits) { +; CHECK-LABEL: @shl_add_vec_poison2( ; CHECK-NEXT: [[NOTMASK:%.*]] = shl nsw <3 x i32> <i32 -1, i32 -1, i32 -1>, [[NBITS:%.*]] ; CHECK-NEXT: [[RET:%.*]] = xor <3 x i32> [[NOTMASK]], <i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <3 x i32> [[RET]] ; - %setbit = shl <3 x i32> <i32 1, i32 undef, i32 1>, %NBits - %ret = add <3 x i32> %setbit, <i32 -1, i32 undef, i32 -1> + %setbit = shl <3 x i32> <i32 1, i32 poison, i32 1>, %NBits + %ret = add <3 x i32> %setbit, <i32 -1, i32 poison, i32 -1> ret <3 x i32> %ret } diff --git a/llvm/test/Transforms/InstCombine/sext.ll b/llvm/test/Transforms/InstCombine/sext.ll index e3b6058..6d263cf 100644 --- a/llvm/test/Transforms/InstCombine/sext.ll +++ b/llvm/test/Transforms/InstCombine/sext.ll @@ -167,39 +167,39 @@ define <2 x i32> @test10_vec_nonuniform(<2 x i32> %i) { ret <2 x i32> %D } -define <2 x i32> @test10_vec_undef0(<2 x i32> %i) { -; CHECK-LABEL: @test10_vec_undef0( -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 undef> -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 undef> +define <2 x i32> @test10_vec_poison0(<2 x i32> %i) { +; CHECK-LABEL: @test10_vec_poison0( +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 poison> +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[D]] ; %A = trunc <2 x i32> %i to <2 x i8> %B = shl <2 x i8> %A, <i8 6, i8 0> - %C = ashr <2 x i8> %B, <i8 6, i8 undef> + %C = ashr <2 x i8> %B, <i8 6, i8 poison> %D = sext <2 x i8> %C to <2 x i32> ret <2 x i32> %D } -define <2 x i32> @test10_vec_undef1(<2 x i32> %i) { -; CHECK-LABEL: @test10_vec_undef1( +define <2 x i32> @test10_vec_poison1(<2 x i32> %i) { +; CHECK-LABEL: @test10_vec_poison1( ; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 undef> ; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 undef> ; CHECK-NEXT: ret <2 x i32> [[D]] ; %A = trunc <2 x i32> %i to <2 x i8> - %B = shl <2 x i8> %A, <i8 6, i8 undef> + %B = shl <2 x i8> %A, <i8 6, i8 poison> %C = ashr <2 x i8> %B, <i8 6, i8 0> %D = sext <2 x i8> %C to <2 x i32> ret <2 x i32> %D } -define <2 x i32> @test10_vec_undef2(<2 x i32> %i) { -; CHECK-LABEL: @test10_vec_undef2( -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 undef> -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 undef> +define <2 x i32> @test10_vec_poison2(<2 x i32> %i) { +; CHECK-LABEL: @test10_vec_poison2( +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i32> [[I:%.*]], <i32 30, i32 poison> +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i32> [[D1]], <i32 30, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[D]] ; %A = trunc <2 x i32> %i to <2 x i8> - %B = shl <2 x i8> %A, <i8 6, i8 undef> - %C = ashr <2 x i8> %B, <i8 6, i8 undef> + %B = shl <2 x i8> %A, <i8 6, i8 poison> + %C = ashr <2 x i8> %B, <i8 6, i8 poison> %D = sext <2 x i8> %C to <2 x i32> ret <2 x i32> %D } diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll index 0262db1..96d429c 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-in-bittest.ll @@ -143,34 +143,34 @@ define <2 x i1> @t8_const_lshr_shl_ne_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { %t3 = icmp ne <2 x i32> %t2, <i32 0, i32 0> ret <2 x i1> %t3 } -define <3 x i1> @t9_const_lshr_shl_ne_vec_undef0(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t9_const_lshr_shl_ne_vec_undef0( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t9_const_lshr_shl_ne_vec_poison0(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t9_const_lshr_shl_ne_vec_poison0( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; - %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1> + %t0 = lshr <3 x i32> %x, <i32 1, i32 poison, i32 1> %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1> %t2 = and <3 x i32> %t1, %t0 %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 0, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t10_const_lshr_shl_ne_vec_undef1(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t10_const_lshr_shl_ne_vec_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t10_const_lshr_shl_ne_vec_poison1(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t10_const_lshr_shl_ne_vec_poison1( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1> - %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1> + %t1 = shl <3 x i32> %y, <i32 1, i32 poison, i32 1> %t2 = and <3 x i32> %t1, %t0 %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 0, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_undef2( +define <3 x i1> @t11_const_lshr_shl_ne_vec_poison2(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t11_const_lshr_shl_ne_vec_poison2( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 2, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer @@ -179,59 +179,59 @@ define <3 x i1> @t11_const_lshr_shl_ne_vec_undef2(<3 x i32> %x, <3 x i32> %y) { %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1> %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1> %t2 = and <3 x i32> %t1, %t0 - %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 undef, i32 0> + %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 poison, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t12_const_lshr_shl_ne_vec_undef3(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t12_const_lshr_shl_ne_vec_undef3( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t12_const_lshr_shl_ne_vec_poison3(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t12_const_lshr_shl_ne_vec_poison3( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; - %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1> - %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1> + %t0 = lshr <3 x i32> %x, <i32 1, i32 poison, i32 1> + %t1 = shl <3 x i32> %y, <i32 1, i32 poison, i32 1> %t2 = and <3 x i32> %t1, %t0 %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 0, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t13_const_lshr_shl_ne_vec_undef4(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t13_const_lshr_shl_ne_vec_undef4( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t13_const_lshr_shl_ne_vec_poison4(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t13_const_lshr_shl_ne_vec_poison4( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; %t0 = lshr <3 x i32> %x, <i32 1, i32 1, i32 1> - %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1> + %t1 = shl <3 x i32> %y, <i32 1, i32 poison, i32 1> %t2 = and <3 x i32> %t1, %t0 - %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 undef, i32 0> + %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 poison, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t14_const_lshr_shl_ne_vec_undef5(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t14_const_lshr_shl_ne_vec_undef5( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t14_const_lshr_shl_ne_vec_poison5(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t14_const_lshr_shl_ne_vec_poison5( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; - %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1> + %t0 = lshr <3 x i32> %x, <i32 1, i32 poison, i32 1> %t1 = shl <3 x i32> %y, <i32 1, i32 1, i32 1> %t2 = and <3 x i32> %t1, %t0 - %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 undef, i32 0> + %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 poison, i32 0> ret <3 x i1> %t3 } -define <3 x i1> @t15_const_lshr_shl_ne_vec_undef6(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t15_const_lshr_shl_ne_vec_undef6( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 undef, i32 2> +define <3 x i1> @t15_const_lshr_shl_ne_vec_poison6(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t15_const_lshr_shl_ne_vec_poison6( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 2, i32 poison, i32 2> ; CHECK-NEXT: [[TMP2:%.*]] = and <3 x i32> [[TMP1]], [[Y:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp ne <3 x i32> [[TMP2]], zeroinitializer ; CHECK-NEXT: ret <3 x i1> [[T3]] ; - %t0 = lshr <3 x i32> %x, <i32 1, i32 undef, i32 1> - %t1 = shl <3 x i32> %y, <i32 1, i32 undef, i32 1> + %t0 = lshr <3 x i32> %x, <i32 1, i32 poison, i32 1> + %t1 = shl <3 x i32> %y, <i32 1, i32 poison, i32 1> %t2 = and <3 x i32> %t1, %t0 - %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 undef, i32 0> + %t3 = icmp ne <3 x i32> %t2, <i32 0, i32 poison, i32 0> ret <3 x i1> %t3 } diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll index 84dd4c57..9efc30c 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-ashr.ll @@ -42,13 +42,13 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ret <2 x i16> %t5 } -define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> +define <3 x i16> @t3_vec_nonsplat_poison0(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_poison0( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; - %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t0 = sub <3 x i16> <i16 32, i16 poison, i16 32>, %y %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = ashr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> @@ -57,9 +57,9 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { ret <3 x i16> %t5 } -define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> +define <3 x i16> @t4_vec_nonsplat_poison1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_poison1( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; @@ -67,22 +67,22 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = ashr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> - %t4 = add <3 x i16> %y, <i16 -1, i16 undef, i16 -1> + %t4 = add <3 x i16> %y, <i16 -1, i16 poison, i16 -1> %t5 = ashr <3 x i16> %t3, %t4 ret <3 x i16> %t5 } -define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> +define <3 x i16> @t5_vec_nonsplat_poison1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_poison1( +; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> ; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; - %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t0 = sub <3 x i16> <i16 32, i16 poison, i16 32>, %y %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = ashr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> - %t4 = add <3 x i16> %y, <i16 -1, i16 undef, i16 -1> + %t4 = add <3 x i16> %y, <i16 -1, i16 poison, i16 -1> %t5 = ashr <3 x i16> %t3, %t4 ret <3 x i16> %t5 } diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll index 214ec88..c31b6ed3 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-lshr.ll @@ -42,13 +42,13 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ret <2 x i16> %t5 } -define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +define <3 x i16> @t3_vec_nonsplat_poison0(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_poison0( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; - %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t0 = sub <3 x i16> <i16 32, i16 poison, i16 32>, %y %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = lshr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> @@ -57,32 +57,32 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { ret <3 x i16> %t5 } -define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +define <3 x i16> @t4_vec_nonsplat_poison1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_poison1( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> <i16 32, i16 32, i16 32>, %y %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = lshr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> - %t4 = add <3 x i16> %y, <i16 -1, i16 undef, i16 -1> + %t4 = add <3 x i16> %y, <i16 -1, i16 poison, i16 -1> %t5 = lshr <3 x i16> %t3, %t4 ret <3 x i16> %t5 } -define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { -; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 0, i32 31> -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +define <3 x i16> @t5_vec_nonsplat_poison1(<3 x i32> %x, <3 x i16> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_poison1( +; CHECK-NEXT: [[TMP1:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 31, i32 poison, i32 31> +; CHECK-NEXT: [[T5:%.*]] = trunc nuw nsw <3 x i32> [[TMP1]] to <3 x i16> ; CHECK-NEXT: ret <3 x i16> [[T5]] ; - %t0 = sub <3 x i16> <i16 32, i16 undef, i16 32>, %y + %t0 = sub <3 x i16> <i16 32, i16 poison, i16 32>, %y %t1 = zext <3 x i16> %t0 to <3 x i32> %t2 = lshr <3 x i32> %x, %t1 %t3 = trunc <3 x i32> %t2 to <3 x i16> - %t4 = add <3 x i16> %y, <i16 -1, i16 undef, i16 -1> + %t4 = add <3 x i16> %y, <i16 -1, i16 poison, i16 -1> %t5 = lshr <3 x i16> %t3, %t4 ret <3 x i16> %t5 } diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll index b96bcd6..6bbe4c5 100644 --- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll +++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation.ll @@ -48,38 +48,38 @@ define <2 x i32> @t2_vec_nonsplat(<2 x i32> %x, <2 x i32> %y) { ; Basic vector tests -define <3 x i32> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30> +define <3 x i32> @t3_vec_nonsplat_poison0(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t3_vec_nonsplat_poison0( +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 poison, i32 30> ; CHECK-NEXT: ret <3 x i32> [[T3]] ; - %t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y + %t0 = sub <3 x i32> <i32 32, i32 poison, i32 32>, %y %t1 = lshr <3 x i32> %x, %t0 %t2 = add <3 x i32> %y, <i32 -2, i32 -2, i32 -2> %t3 = lshr <3 x i32> %t1, %t2 ret <3 x i32> %t3 } -define <3 x i32> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30> +define <3 x i32> @t4_vec_nonsplat_poison1(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t4_vec_nonsplat_poison1( +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 poison, i32 30> ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t0 = sub <3 x i32> <i32 32, i32 32, i32 32>, %y %t1 = lshr <3 x i32> %x, %t0 - %t2 = add <3 x i32> %y, <i32 -2, i32 undef, i32 -2> + %t2 = add <3 x i32> %y, <i32 -2, i32 poison, i32 -2> %t3 = lshr <3 x i32> %t1, %t2 ret <3 x i32> %t3 } -define <3 x i32> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i32> %y) { -; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 undef, i32 30> +define <3 x i32> @t5_vec_nonsplat_poison1(<3 x i32> %x, <3 x i32> %y) { +; CHECK-LABEL: @t5_vec_nonsplat_poison1( +; CHECK-NEXT: [[T3:%.*]] = lshr <3 x i32> [[X:%.*]], <i32 30, i32 poison, i32 30> ; CHECK-NEXT: ret <3 x i32> [[T3]] ; - %t0 = sub <3 x i32> <i32 32, i32 undef, i32 32>, %y + %t0 = sub <3 x i32> <i32 32, i32 poison, i32 32>, %y %t1 = lshr <3 x i32> %x, %t0 - %t2 = add <3 x i32> %y, <i32 -2, i32 undef, i32 -2> + %t2 = add <3 x i32> %y, <i32 -2, i32 poison, i32 -2> %t3 = lshr <3 x i32> %t1, %t2 ret <3 x i32> %t3 } diff --git a/llvm/test/Transforms/InstCombine/shift-logic.ll b/llvm/test/Transforms/InstCombine/shift-logic.ll index c982b45..b591400 100644 --- a/llvm/test/Transforms/InstCombine/shift-logic.ll +++ b/llvm/test/Transforms/InstCombine/shift-logic.ll @@ -44,18 +44,18 @@ define i16 @shl_or(i16 %x, i16 %py) { ret i16 %sh1 } -define <2 x i16> @shl_or_undef(<2 x i16> %x, <2 x i16> %py) { -; CHECK-LABEL: @shl_or_undef( +define <2 x i16> @shl_or_poison(<2 x i16> %x, <2 x i16> %py) { +; CHECK-LABEL: @shl_or_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i16> [[PY:%.*]], <i16 42, i16 42> -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[X:%.*]], <i16 12, i16 undef> -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i16> [[Y]], <i16 7, i16 undef> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i16> [[X:%.*]], <i16 12, i16 poison> +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i16> [[Y]], <i16 7, i16 poison> ; CHECK-NEXT: [[SH1:%.*]] = or <2 x i16> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i16> [[SH1]] ; %y = srem <2 x i16> %py, <i16 42, i16 42> ; thwart complexity-based canonicalization - %sh0 = shl <2 x i16> %x, <i16 5, i16 undef> + %sh0 = shl <2 x i16> %x, <i16 5, i16 poison> %r = or <2 x i16> %y, %sh0 - %sh1 = shl <2 x i16> %r, <i16 7, i16 undef> + %sh1 = shl <2 x i16> %r, <i16 7, i16 poison> ret <2 x i16> %sh1 } @@ -100,18 +100,18 @@ define i64 @lshr_and(i64 %x, i64 %py) { ret i64 %sh1 } -define <2 x i64> @lshr_and_undef(<2 x i64> %x, <2 x i64> %py) { -; CHECK-LABEL: @lshr_and_undef( +define <2 x i64> @lshr_and_poison(<2 x i64> %x, <2 x i64> %py) { +; CHECK-LABEL: @lshr_and_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42> -; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 12, i64 undef> -; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[Y]], <i64 7, i64 undef> +; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 12, i64 poison> +; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[Y]], <i64 7, i64 poison> ; CHECK-NEXT: [[SH1:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization - %sh0 = lshr <2 x i64> %x, <i64 5, i64 undef> + %sh0 = lshr <2 x i64> %x, <i64 5, i64 poison> %r = and <2 x i64> %y, %sh0 - %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef> + %sh1 = lshr <2 x i64> %r, <i64 7, i64 poison> ret <2 x i64> %sh1 } @@ -212,16 +212,16 @@ define i32 @ashr_overshift_xor(i32 %x, i32 %y) { ret i32 %sh1 } -define <2 x i32> @ashr_undef_undef_xor(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @ashr_undef_undef_xor( -; CHECK-NEXT: [[SH0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 15, i32 undef> +define <2 x i32> @ashr_poison_poison_xor(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @ashr_poison_poison_xor( +; CHECK-NEXT: [[SH0:%.*]] = ashr <2 x i32> [[X:%.*]], <i32 15, i32 poison> ; CHECK-NEXT: [[R:%.*]] = xor <2 x i32> [[SH0]], [[Y:%.*]] -; CHECK-NEXT: [[SH1:%.*]] = ashr <2 x i32> [[R]], <i32 undef, i32 17> +; CHECK-NEXT: [[SH1:%.*]] = ashr <2 x i32> [[R]], <i32 poison, i32 17> ; CHECK-NEXT: ret <2 x i32> [[SH1]] ; - %sh0 = ashr <2 x i32> %x, <i32 15, i32 undef> + %sh0 = ashr <2 x i32> %x, <i32 15, i32 poison> %r = xor <2 x i32> %y, %sh0 - %sh1 = ashr <2 x i32> %r, <i32 undef, i32 17> + %sh1 = ashr <2 x i32> %r, <i32 poison, i32 17> ret <2 x i32> %sh1 } @@ -390,18 +390,18 @@ define <2 x i8> @shl_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { } -define <2 x i64> @shl_add_undef(<2 x i64> %x, <2 x i64> %py) { -; CHECK-LABEL: @shl_add_undef( +define <2 x i64> @shl_add_poison(<2 x i64> %x, <2 x i64> %py) { +; CHECK-LABEL: @shl_add_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42> -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef> -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 poison> +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], <i64 7, i64 poison> ; CHECK-NEXT: [[SH1:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization - %sh0 = shl <2 x i64> %x, <i64 5, i64 undef> + %sh0 = shl <2 x i64> %x, <i64 5, i64 poison> %r = add <2 x i64> %y, %sh0 - %sh1 = shl <2 x i64> %r, <i64 7, i64 undef> + %sh1 = shl <2 x i64> %r, <i64 7, i64 poison> ret <2 x i64> %sh1 } @@ -432,18 +432,18 @@ define <2 x i8> @lshr_add_nonuniform(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %sh1 } -define <2 x i64> @lshr_add_undef(<2 x i64> %x, <2 x i64> %py) { -; CHECK-LABEL: @lshr_add_undef( +define <2 x i64> @lshr_add_poison(<2 x i64> %x, <2 x i64> %py) { +; CHECK-LABEL: @lshr_add_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42> -; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 undef> -; CHECK-NEXT: [[R:%.*]] = add <2 x i64> [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 undef> +; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 poison> +; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i64> [[Y]], [[SH0]] +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 poison> ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization - %sh0 = lshr <2 x i64> %x, <i64 5, i64 undef> + %sh0 = lshr <2 x i64> %x, <i64 5, i64 poison> %r = add <2 x i64> %y, %sh0 - %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef> + %sh1 = lshr <2 x i64> %r, <i64 7, i64 poison> ret <2 x i64> %sh1 } @@ -488,18 +488,18 @@ define <2 x i8> @shl_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { } -define <2 x i64> @shl_sub_undef(<2 x i64> %x, <2 x i64> %py) { -; CHECK-LABEL: @shl_sub_undef( +define <2 x i64> @shl_sub_poison(<2 x i64> %x, <2 x i64> %py) { +; CHECK-LABEL: @shl_sub_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42> -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 undef> -; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[Y]], <i64 7, i64 undef> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[X:%.*]], <i64 12, i64 poison> +; CHECK-NEXT: [[TMP2:%.*]] = shl nsw <2 x i64> [[Y]], <i64 7, i64 poison> ; CHECK-NEXT: [[SH1:%.*]] = sub <2 x i64> [[TMP2]], [[TMP1]] ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization - %sh0 = shl <2 x i64> %x, <i64 5, i64 undef> + %sh0 = shl <2 x i64> %x, <i64 5, i64 poison> %r = sub <2 x i64> %y, %sh0 - %sh1 = shl <2 x i64> %r, <i64 7, i64 undef> + %sh1 = shl <2 x i64> %r, <i64 7, i64 poison> ret <2 x i64> %sh1 } @@ -530,17 +530,17 @@ define <2 x i8> @lshr_sub_nonuniform(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %sh1 } -define <2 x i64> @lshr_sub_undef(<2 x i64> %x, <2 x i64> %py) { -; CHECK-LABEL: @lshr_sub_undef( +define <2 x i64> @lshr_sub_poison(<2 x i64> %x, <2 x i64> %py) { +; CHECK-LABEL: @lshr_sub_poison( ; CHECK-NEXT: [[Y:%.*]] = srem <2 x i64> [[PY:%.*]], <i64 42, i64 42> -; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 undef> -; CHECK-NEXT: [[R:%.*]] = sub <2 x i64> [[Y]], [[SH0]] -; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 undef> +; CHECK-NEXT: [[SH0:%.*]] = lshr <2 x i64> [[X:%.*]], <i64 5, i64 poison> +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i64> [[Y]], [[SH0]] +; CHECK-NEXT: [[SH1:%.*]] = lshr <2 x i64> [[R]], <i64 7, i64 poison> ; CHECK-NEXT: ret <2 x i64> [[SH1]] ; %y = srem <2 x i64> %py, <i64 42, i64 42> ; thwart complexity-based canonicalization - %sh0 = lshr <2 x i64> %x, <i64 5, i64 undef> + %sh0 = lshr <2 x i64> %x, <i64 5, i64 poison> %r = sub <2 x i64> %y, %sh0 - %sh1 = lshr <2 x i64> %r, <i64 7, i64 undef> + %sh1 = lshr <2 x i64> %r, <i64 7, i64 poison> ret <2 x i64> %sh1 } diff --git a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll index 406dc72..daa4955 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-negC-icmpeq-zero.ll @@ -81,39 +81,39 @@ define <4 x i1> @vec_4xi32_shl_and_negC_eq(<4 x i32> %x, <4 x i32> %y) { ret <4 x i1> %r } -define <4 x i1> @vec_shl_and_negC_eq_undef1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_shl_and_negC_eq_undef1( +define <4 x i1> @vec_shl_and_negC_eq_poison1(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_shl_and_negC_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y - %and = and <4 x i32> %shl, <i32 4294967288, i32 undef, i32 4294967288, i32 4294967288> ; ~7 + %and = and <4 x i32> %shl, <i32 4294967288, i32 poison, i32 4294967288, i32 4294967288> ; ~7 %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_shl_and_negC_eq_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_shl_and_negC_eq_undef2( +define <4 x i1> @vec_shl_and_negC_eq_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_shl_and_negC_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y %and = and <4 x i32> %shl, <i32 4294967288, i32 4294967288, i32 4294967288, i32 4294967288> ; ~7 - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } -define <4 x i1> @vec_shl_and_negC_eq_undef3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_shl_and_negC_eq_undef3( +define <4 x i1> @vec_shl_and_negC_eq_poison3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_shl_and_negC_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp ult <4 x i32> [[SHL]], <i32 8, i32 8, i32 8, i32 8> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y - %and = and <4 x i32> %shl, <i32 4294967288, i32 4294967288, i32 undef, i32 4294967288> ; ~7 - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %and = and <4 x i32> %shl, <i32 4294967288, i32 4294967288, i32 poison, i32 4294967288> ; ~7 + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll index 4c2c876..dcc1819 100644 --- a/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll +++ b/llvm/test/Transforms/InstCombine/shl-and-signbit-icmpeq-zero.ll @@ -81,39 +81,39 @@ define <4 x i1> @vec_4xi32_shl_and_signbit_eq(<4 x i32> %x, <4 x i32> %y) { ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef1(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef1( +define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison1(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison1( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y - %and = and <4 x i32> %shl, <i32 2147483648, i32 undef, i32 2147483648, i32 2147483648> + %and = and <4 x i32> %shl, <i32 2147483648, i32 poison, i32 2147483648, i32 2147483648> %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef2(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef2( +define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison2(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison2( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y %and = and <4 x i32> %shl, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> - %r = icmp eq <4 x i32> %and, <i32 undef, i32 0, i32 0, i32 0> + %r = icmp eq <4 x i32> %and, <i32 poison, i32 0, i32 0, i32 0> ret <4 x i1> %r } -define <4 x i1> @vec_4xi32_shl_and_signbit_eq_undef3(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_undef3( +define <4 x i1> @vec_4xi32_shl_and_signbit_eq_poison3(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @vec_4xi32_shl_and_signbit_eq_poison3( ; CHECK-NEXT: [[SHL:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[R:%.*]] = icmp sgt <4 x i32> [[SHL]], <i32 -1, i32 -1, i32 -1, i32 -1> ; CHECK-NEXT: ret <4 x i1> [[R]] ; %shl = shl <4 x i32> %x, %y - %and = and <4 x i32> %shl, <i32 2147483648, i32 undef, i32 2147483648, i32 2147483648> - %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 undef> + %and = and <4 x i32> %shl, <i32 2147483648, i32 poison, i32 2147483648, i32 2147483648> + %r = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 poison> ret <4 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll index aeb4c8b..e750572 100644 --- a/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll +++ b/llvm/test/Transforms/InstCombine/signmask-of-sext-vs-of-shl-of-zext.ll @@ -129,40 +129,56 @@ define <2 x i32> @t8(<2 x i16> %x) { %r = and <2 x i32> %i1, <i32 -2147483648, i32 -2147483648> ret <2 x i32> %r } + define <2 x i32> @t9(<2 x i16> %x) { ; CHECK-LABEL: @t9( -; CHECK-NEXT: [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], <i32 -2147483648, i32 undef> +; CHECK-NEXT: [[I1:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], <i32 -2147483648, i32 -2147483648> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> - %i1 = shl <2 x i32> %i0, <i32 16, i32 undef> + %i1 = shl <2 x i32> %i0, <i32 16, i32 poison> %r = and <2 x i32> %i1, <i32 -2147483648, i32 -2147483648> - ; Here undef can be propagated into the mask. ret <2 x i32> %r } -define <2 x i32> @t10(<2 x i16> %x) { -; CHECK-LABEL: @t10( -; CHECK-NEXT: [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], <i32 -2147483648, i32 0> + +; If we folded this, we wouldn't be able to keep the undef mask. +define <2 x i32> @t10_undef(<2 x i16> %x) { +; CHECK-LABEL: @t10_undef( +; CHECK-NEXT: [[I0:%.*]] = zext <2 x i16> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[I1:%.*]] = shl nuw <2 x i32> [[I0]], <i32 16, i32 16> +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], <i32 -2147483648, i32 undef> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> %i1 = shl <2 x i32> %i0, <i32 16, i32 16> %r = and <2 x i32> %i1, <i32 -2147483648, i32 undef> - ; CAREFUL! We can't keep undef mask here, since high bits are no longer zero, + ret <2 x i32> %r +} + +define <2 x i32> @t10_poison(<2 x i16> %x) { +; CHECK-LABEL: @t10_poison( +; CHECK-NEXT: [[I1:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[I1]], <i32 -2147483648, i32 poison> +; CHECK-NEXT: ret <2 x i32> [[R]] +; + %i0 = zext <2 x i16> %x to <2 x i32> + %i1 = shl <2 x i32> %i0, <i32 16, i32 16> + %r = and <2 x i32> %i1, <i32 -2147483648, i32 poison> + ; CAREFUL! We can't keep poison mask here, since high bits are no longer zero, ; we must sanitize it to 0. ret <2 x i32> %r } + define <2 x i32> @t11(<2 x i16> %x) { ; CHECK-LABEL: @t11( ; CHECK-NEXT: [[X_SIGNEXT:%.*]] = sext <2 x i16> [[X:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], <i32 -2147483648, i32 undef> +; CHECK-NEXT: [[R:%.*]] = and <2 x i32> [[X_SIGNEXT]], <i32 -2147483648, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[R]] ; %i0 = zext <2 x i16> %x to <2 x i32> - %i1 = shl <2 x i32> %i0, <i32 16, i32 undef> - %r = and <2 x i32> %i1, <i32 -2147483648, i32 undef> - ; Here undef mask is fine. + %i1 = shl <2 x i32> %i0, <i32 16, i32 poison> + %r = and <2 x i32> %i1, <i32 -2147483648, i32 poison> + ; Here poison mask is fine. ret <2 x i32> %r } diff --git a/llvm/test/Transforms/InstCombine/sitofp.ll b/llvm/test/Transforms/InstCombine/sitofp.ll index cc6b642..51eff39 100644 --- a/llvm/test/Transforms/InstCombine/sitofp.ll +++ b/llvm/test/Transforms/InstCombine/sitofp.ll @@ -256,7 +256,7 @@ define i25 @consider_lowbits_masked_input(i25 %A) { define i32 @overflow_masked_input(i32 %A) { ; CHECK-LABEL: @overflow_masked_input( ; CHECK-NEXT: [[M:%.*]] = and i32 [[A:%.*]], 16777217 -; CHECK-NEXT: [[B:%.*]] = uitofp i32 [[M]] to float +; CHECK-NEXT: [[B:%.*]] = uitofp nneg i32 [[M]] to float ; CHECK-NEXT: [[C:%.*]] = fptoui float [[B]] to i32 ; CHECK-NEXT: ret i32 [[C]] ; diff --git a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll index 996d2c0..d079c03 100644 --- a/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll +++ b/llvm/test/Transforms/InstCombine/struct-assign-tbaa.ll @@ -75,7 +75,7 @@ entry: !1 = !{!"omnipotent char", !0} !2 = !{!5, !5, i64 0} !3 = !{i64 0, i64 4, !2} -!4 = !{i64 0, i64 8, null} +!4 = !{i64 0, i64 8, !2} !5 = !{!"float", !0} !6 = !{i64 0, i64 4, !2, i64 4, i64 4, !2} !7 = !{i64 0, i64 2, !2, i64 4, i64 6, !2} diff --git a/llvm/test/Transforms/InstCombine/sub-not.ll b/llvm/test/Transforms/InstCombine/sub-not.ll index ec36754..89ccf5a 100644 --- a/llvm/test/Transforms/InstCombine/sub-not.ll +++ b/llvm/test/Transforms/InstCombine/sub-not.ll @@ -34,7 +34,7 @@ define <2 x i8> @sub_not_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = sub <2 x i8> %x, %y - %r = xor <2 x i8> %s, <i8 -1, i8 undef> + %r = xor <2 x i8> %s, <i8 -1, i8 poison> ret <2 x i8> %r } @@ -69,7 +69,7 @@ define <2 x i8> @dec_sub_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: ret <2 x i8> [[R]] ; %s = sub <2 x i8> %x, %y - %r = add <2 x i8> %s, <i8 -1, i8 undef> + %r = add <2 x i8> %s, <i8 -1, i8 poison> ret <2 x i8> %r } @@ -103,7 +103,7 @@ define <2 x i8> @sub_inc_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[S_NEG]], [[Y:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %s = add <2 x i8> %x, <i8 undef, i8 1> + %s = add <2 x i8> %x, <i8 poison, i8 1> %r = sub <2 x i8> %y, %s ret <2 x i8> %r } @@ -138,7 +138,7 @@ define <2 x i8> @sub_dec_vec(<2 x i8> %x, <2 x i8> %y) { ; CHECK-NEXT: [[R:%.*]] = add <2 x i8> [[TMP1]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %s = add <2 x i8> %x, <i8 undef, i8 -1> + %s = add <2 x i8> %x, <i8 poison, i8 -1> %r = sub <2 x i8> %s, %y ret <2 x i8> %r } diff --git a/llvm/test/Transforms/InstCombine/sub.ll b/llvm/test/Transforms/InstCombine/sub.ll index 249b567..a84e389 100644 --- a/llvm/test/Transforms/InstCombine/sub.ll +++ b/llvm/test/Transforms/InstCombine/sub.ll @@ -130,44 +130,44 @@ define <2 x i32> @neg_nsw_sub_nsw_vec(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %r } -define <2 x i32> @neg_sub_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @neg_sub_vec_undef( +define <2 x i32> @neg_sub_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @neg_sub_vec_poison( ; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %neg = sub <2 x i32> <i32 0, i32 undef>, %x + %neg = sub <2 x i32> <i32 0, i32 poison>, %x %r = sub <2 x i32> %y, %neg ret <2 x i32> %r } -define <2 x i32> @neg_nsw_sub_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @neg_nsw_sub_vec_undef( +define <2 x i32> @neg_nsw_sub_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @neg_nsw_sub_vec_poison( ; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %neg = sub nsw <2 x i32> <i32 undef, i32 0>, %x + %neg = sub nsw <2 x i32> <i32 poison, i32 0>, %x %r = sub <2 x i32> %y, %neg ret <2 x i32> %r } -define <2 x i32> @neg_sub_nsw_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @neg_sub_nsw_vec_undef( +define <2 x i32> @neg_sub_nsw_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @neg_sub_nsw_vec_poison( ; CHECK-NEXT: [[R:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %neg = sub <2 x i32> <i32 undef, i32 0>, %x + %neg = sub <2 x i32> <i32 poison, i32 0>, %x %r = sub nsw <2 x i32> %y, %neg ret <2 x i32> %r } ; This should not drop 'nsw'. -define <2 x i32> @neg_nsw_sub_nsw_vec_undef(<2 x i32> %x, <2 x i32> %y) { -; CHECK-LABEL: @neg_nsw_sub_nsw_vec_undef( +define <2 x i32> @neg_nsw_sub_nsw_vec_poison(<2 x i32> %x, <2 x i32> %y) { +; CHECK-LABEL: @neg_nsw_sub_nsw_vec_poison( ; CHECK-NEXT: [[R:%.*]] = add nsw <2 x i32> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; - %neg = sub nsw <2 x i32> <i32 0, i32 undef>, %x + %neg = sub nsw <2 x i32> <i32 0, i32 poison>, %x %r = sub nsw <2 x i32> %y, %neg ret <2 x i32> %r } @@ -205,13 +205,13 @@ define <2 x i8> @notnotsub_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %sub } -define <2 x i8> @notnotsub_vec_undef_elts(<2 x i8> %x, <2 x i8> %y) { -; CHECK-LABEL: @notnotsub_vec_undef_elts( +define <2 x i8> @notnotsub_vec_poison_elts(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @notnotsub_vec_poison_elts( ; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i8> [[Y:%.*]], [[X:%.*]] ; CHECK-NEXT: ret <2 x i8> [[SUB]] ; - %nx = xor <2 x i8> %x, <i8 undef, i8 -1> - %ny = xor <2 x i8> %y, <i8 -1, i8 undef> + %nx = xor <2 x i8> %x, <i8 poison, i8 -1> + %ny = xor <2 x i8> %y, <i8 -1, i8 poison> %sub = sub <2 x i8> %nx, %ny ret <2 x i8> %sub } @@ -2351,12 +2351,12 @@ define <2 x i8> @sub_to_and_vector1(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector2( -; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 undef>, [[X:%.*]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 poison>, [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 120> ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> <i8 77, i8 77>, [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; - %sub = sub nuw <2 x i8> <i8 71, i8 undef>, %x + %sub = sub nuw <2 x i8> <i8 71, i8 poison>, %x %and = and <2 x i8> %sub, <i8 120, i8 120> %r = sub <2 x i8> <i8 77, i8 77>, %and ret <2 x i8> %r @@ -2366,12 +2366,12 @@ define <2 x i8> @sub_to_and_vector2(<2 x i8> %x) { define <2 x i8> @sub_to_and_vector3(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector3( ; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 71>, [[X:%.*]] -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 undef> +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 poison> ; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> <i8 44, i8 44>, [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> <i8 71, i8 71>, %x - %and = and <2 x i8> %sub, <i8 120, i8 undef> + %and = and <2 x i8> %sub, <i8 120, i8 poison> %r = sub <2 x i8> <i8 44, i8 44>, %and ret <2 x i8> %r } @@ -2381,12 +2381,12 @@ define <2 x i8> @sub_to_and_vector4(<2 x i8> %x) { ; CHECK-LABEL: @sub_to_and_vector4( ; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> <i8 71, i8 71>, [[X:%.*]] ; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[SUB]], <i8 120, i8 120> -; CHECK-NEXT: [[R:%.*]] = sub <2 x i8> <i8 88, i8 undef>, [[AND]] +; CHECK-NEXT: [[R:%.*]] = sub nsw <2 x i8> <i8 88, i8 poison>, [[AND]] ; CHECK-NEXT: ret <2 x i8> [[R]] ; %sub = sub nuw <2 x i8> <i8 71, i8 71>, %x %and = and <2 x i8> %sub, <i8 120, i8 120> - %r = sub <2 x i8> <i8 88, i8 undef>, %and + %r = sub <2 x i8> <i8 88, i8 poison>, %and ret <2 x i8> %r } diff --git a/llvm/test/Transforms/InstCombine/switch-select.ll b/llvm/test/Transforms/InstCombine/switch-select.ll new file mode 100644 index 0000000..60757c5 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/switch-select.ll @@ -0,0 +1,159 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define void @test_ult_rhsc(i8 %x) { +; CHECK-LABEL: define void @test_ult_rhsc( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: switch i8 [[X]], label [[BB1:%.*]] [ +; CHECK-NEXT: i8 2, label [[BB2:%.*]] +; CHECK-NEXT: i8 12, label [[BB3:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: unreachable +; + %val = add nsw i8 %x, -2 + %cmp = icmp ult i8 %val, 11 + %cond = select i1 %cmp, i8 %val, i8 6 + switch i8 %cond, label %bb1 [ + i8 0, label %bb2 + i8 10, label %bb3 + ] + +bb1: + call void @func1() + unreachable +bb2: + call void @func2() + unreachable +bb3: + call void @func3() + unreachable +} + +define void @test_eq_lhsc(i8 %x) { +; CHECK-LABEL: define void @test_eq_lhsc( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: switch i8 [[X]], label [[BB1:%.*]] [ +; CHECK-NEXT: i8 0, label [[BB2:%.*]] +; CHECK-NEXT: i8 10, label [[BB3:%.*]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: unreachable +; + %cmp = icmp eq i8 %x, 4 + %cond = select i1 %cmp, i8 6, i8 %x + switch i8 %cond, label %bb1 [ + i8 0, label %bb2 + i8 10, label %bb3 + ] + +bb1: + call void @func1() + unreachable +bb2: + call void @func2() + unreachable +bb3: + call void @func3() + unreachable +} + +define void @test_ult_rhsc_invalid_cond(i8 %x, i8 %y) { +; CHECK-LABEL: define void @test_ult_rhsc_invalid_cond( +; CHECK-SAME: i8 [[X:%.*]], i8 [[Y:%.*]]) { +; CHECK-NEXT: [[VAL:%.*]] = add nsw i8 [[X]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[Y]], 11 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 [[VAL]], i8 6 +; CHECK-NEXT: switch i8 [[COND]], label [[BB1:%.*]] [ +; CHECK-NEXT: i8 0, label [[BB2:%.*]] +; CHECK-NEXT: i8 10, label [[BB3:%.*]] +; CHECK-NEXT: i8 13, label [[BB3]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: unreachable +; + %val = add nsw i8 %x, -2 + %cmp = icmp ult i8 %y, 11 + %cond = select i1 %cmp, i8 %val, i8 6 + switch i8 %cond, label %bb1 [ + i8 0, label %bb2 + i8 10, label %bb3 + i8 13, label %bb3 + ] + +bb1: + call void @func1() + unreachable +bb2: + call void @func2() + unreachable +bb3: + call void @func3() + unreachable +} + +define void @test_ult_rhsc_fail(i8 %x) { +; CHECK-LABEL: define void @test_ult_rhsc_fail( +; CHECK-SAME: i8 [[X:%.*]]) { +; CHECK-NEXT: [[VAL:%.*]] = add nsw i8 [[X]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[VAL]], 11 +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8 [[VAL]], i8 6 +; CHECK-NEXT: switch i8 [[COND]], label [[BB1:%.*]] [ +; CHECK-NEXT: i8 0, label [[BB2:%.*]] +; CHECK-NEXT: i8 10, label [[BB3:%.*]] +; CHECK-NEXT: i8 13, label [[BB3]] +; CHECK-NEXT: ] +; CHECK: bb1: +; CHECK-NEXT: call void @func1() +; CHECK-NEXT: unreachable +; CHECK: bb2: +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: call void @func3() +; CHECK-NEXT: unreachable +; + %val = add nsw i8 %x, -2 + %cmp = icmp ult i8 %val, 11 + %cond = select i1 %cmp, i8 %val, i8 6 + switch i8 %cond, label %bb1 [ + i8 0, label %bb2 + i8 10, label %bb3 + i8 13, label %bb3 + ] + +bb1: + call void @func1() + unreachable +bb2: + call void @func2() + unreachable +bb3: + call void @func3() + unreachable +} + +declare void @func1() +declare void @func2() +declare void @func3() diff --git a/llvm/test/Transforms/InstCombine/threadlocal_address.ll b/llvm/test/Transforms/InstCombine/threadlocal_address.ll new file mode 100644 index 0000000..0c220d9 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/threadlocal_address.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -o - -S %s -passes=instcombine | FileCheck %s + +@tlsvar_a4 = thread_local global i32 4, align 4 + +define void @func_increase_alignment() { +; CHECK-LABEL: define void @func_increase_alignment() { +; CHECK-NEXT: [[P:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr @tlsvar_a4) +; CHECK-NEXT: store i32 42, ptr [[P]], align 2 +; CHECK-NEXT: ret void +; + %p = call align 2 ptr @llvm.threadlocal.address(ptr @tlsvar_a4) + store i32 42, ptr %p, align 2 + ret void +} + +@tlsvar_a32 = thread_local global i32 5, align 32 + +define i1 @func_add_alignment() { +; CHECK-LABEL: define i1 @func_add_alignment() { +; CHECK-NEXT: ret i1 true +; + %p = call ptr @llvm.threadlocal.address(ptr @tlsvar_a32) + %p_int = ptrtoint ptr %p to i32 + %lowbits = and i32 %p_int, 31 + %zero = icmp eq i32 %lowbits, 0 + ret i1 %zero +} + +@tlsvar_a1 = thread_local global i8 6, align 1 + +define i1 @func_dont_reduce_alignment() { +; CHECK-LABEL: define i1 @func_dont_reduce_alignment() { +; CHECK-NEXT: ret i1 true +; + %p = call align 4 ptr @llvm.threadlocal.address(ptr @tlsvar_a1) + %p_int = ptrtoint ptr %p to i32 + %lowbits = and i32 %p_int, 3 + %zero = icmp eq i32 %lowbits, 0 + ret i1 %zero +} diff --git a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll index 4c85712..063006b 100644 --- a/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/trunc-inseltpoison.ll @@ -49,15 +49,15 @@ define <2 x i64> @test1_vec_nonuniform(<2 x i64> %a) { ret <2 x i64> %d } -define <2 x i64> @test1_vec_undef(<2 x i64> %a) { -; CHECK-LABEL: @test1_vec_undef( +define <2 x i64> @test1_vec_poison(<2 x i64> %a) { +; CHECK-LABEL: @test1_vec_poison( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 0> +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 poison> ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = and <2 x i32> %b, <i32 15, i32 undef> + %c = and <2 x i32> %b, <i32 15, i32 poison> %d = zext <2 x i32> %c to <2 x i64> call void @use_vec(<2 x i32> %b) ret <2 x i64> %d @@ -111,17 +111,17 @@ define <2 x i64> @test2_vec_nonuniform(<2 x i64> %a) { ret <2 x i64> %d } -define <2 x i64> @test2_vec_undef(<2 x i64> %a) { -; CHECK-LABEL: @test2_vec_undef( +define <2 x i64> @test2_vec_poison(<2 x i64> %a) { +; CHECK-LABEL: @test2_vec_poison( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 undef> -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 undef> +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 poison> +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 poison> ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = shl <2 x i32> %b, <i32 4, i32 undef> - %q = ashr <2 x i32> %c, <i32 4, i32 undef> + %c = shl <2 x i32> %b, <i32 4, i32 poison> + %q = ashr <2 x i32> %c, <i32 4, i32 poison> %d = sext <2 x i32> %q to <2 x i64> call void @use_vec(<2 x i32> %b) ret <2 x i64> %d @@ -300,18 +300,17 @@ define <2 x i64> @test8_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test8_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test8_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = shl <2 x i128> [[D]], <i128 32, i128 undef> -; CHECK-NEXT: [[F:%.*]] = or <2 x i128> [[E]], [[C]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test8_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test8_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], <i64 32, i64 poison> +; CHECK-NEXT: [[G:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = shl <2 x i128> %D, <i128 32, i128 undef> + %E = shl <2 x i128> %D, <i128 32, i128 poison> %F = or <2 x i128> %E, %C %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -388,18 +387,17 @@ define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test11_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test11_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test11_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test11_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = shl <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -453,18 +451,17 @@ define <2 x i64> @test12_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test12_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc nuw nsw <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test12_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test12_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = lshr <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -518,18 +515,17 @@ define <2 x i64> @test13_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test13_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test13_vec_undef( -; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc nsw <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test13_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test13_vec_poison( +; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = sext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = ashr <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -766,13 +762,13 @@ define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ret <2 x i32> %trunc } -define <2 x i32> @trunc_shl_v2i32_v2i64_undef(<2 x i64> %val) { -; CHECK-LABEL: @trunc_shl_v2i32_v2i64_undef( +define <2 x i32> @trunc_shl_v2i32_v2i64_poison(<2 x i64> %val) { +; CHECK-LABEL: @trunc_shl_v2i32_v2i64_poison( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 undef> +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; - %shl = shl <2 x i64> %val, <i64 31, i64 undef> + %shl = shl <2 x i64> %val, <i64 31, i64 poison> %trunc = trunc <2 x i64> %shl to <2 x i32> ret <2 x i32> %trunc } @@ -917,7 +913,7 @@ define <4 x i8> @wide_shuf(<4 x i32> %x) { ret <4 x i8> %trunc } -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask define <4 x i8> @wide_splat1(<4 x i32> %x) { ; CHECK-LABEL: @wide_splat1( @@ -931,7 +927,7 @@ define <4 x i8> @wide_splat1(<4 x i32> %x) { } ; Test weird types. -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask define <3 x i31> @wide_splat2(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat2( @@ -945,8 +941,8 @@ define <3 x i31> @wide_splat2(<3 x i33> %x) { } ; FIXME: -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask -; A mask with undef elements should still be considered a splat mask. +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask +; A mask with poison elements should still be considered a splat mask. define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat3( @@ -954,7 +950,7 @@ define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; - %shuf = shufflevector <3 x i33> %x, <3 x i33> poison, <3 x i32> <i32 undef, i32 1, i32 1> + %shuf = shufflevector <3 x i33> %x, <3 x i33> poison, <3 x i32> <i32 poison, i32 1, i32 1> %trunc = trunc <3 x i33> %shuf to <3 x i31> ret <3 x i31> %trunc } diff --git a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll index 2c5f428..c50a3d0 100644 --- a/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc-shift-trunc.ll @@ -56,14 +56,14 @@ define <2 x i8> @trunc_lshr_trunc_nonuniform(<2 x i64> %a) { ret <2 x i8> %d } -define <2 x i8> @trunc_lshr_trunc_uniform_undef(<2 x i64> %a) { -; CHECK-LABEL: @trunc_lshr_trunc_uniform_undef( -; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 24, i64 undef> +define <2 x i8> @trunc_lshr_trunc_uniform_poison(<2 x i64> %a) { +; CHECK-LABEL: @trunc_lshr_trunc_uniform_poison( +; CHECK-NEXT: [[C1:%.*]] = lshr <2 x i64> [[A:%.*]], <i64 24, i64 poison> ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = lshr <2 x i32> %b, <i32 24, i32 undef> + %c = lshr <2 x i32> %b, <i32 24, i32 poison> %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } @@ -142,14 +142,14 @@ define <2 x i8> @trunc_ashr_trunc_nonuniform(<2 x i64> %a) { ret <2 x i8> %d } -define <2 x i8> @trunc_ashr_trunc_uniform_undef(<2 x i64> %a) { -; CHECK-LABEL: @trunc_ashr_trunc_uniform_undef( -; CHECK-NEXT: [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], <i64 8, i64 undef> +define <2 x i8> @trunc_ashr_trunc_uniform_poison(<2 x i64> %a) { +; CHECK-LABEL: @trunc_ashr_trunc_uniform_poison( +; CHECK-NEXT: [[C1:%.*]] = ashr <2 x i64> [[A:%.*]], <i64 8, i64 poison> ; CHECK-NEXT: [[D:%.*]] = trunc <2 x i64> [[C1]] to <2 x i8> ; CHECK-NEXT: ret <2 x i8> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = ashr <2 x i32> %b, <i32 8, i32 undef> + %c = ashr <2 x i32> %b, <i32 8, i32 poison> %d = trunc <2 x i32> %c to <2 x i8> ret <2 x i8> %d } diff --git a/llvm/test/Transforms/InstCombine/trunc.ll b/llvm/test/Transforms/InstCombine/trunc.ll index c77d726..e59b2be 100644 --- a/llvm/test/Transforms/InstCombine/trunc.ll +++ b/llvm/test/Transforms/InstCombine/trunc.ll @@ -49,15 +49,15 @@ define <2 x i64> @test1_vec_nonuniform(<2 x i64> %a) { ret <2 x i64> %d } -define <2 x i64> @test1_vec_undef(<2 x i64> %a) { -; CHECK-LABEL: @test1_vec_undef( +define <2 x i64> @test1_vec_poison(<2 x i64> %a) { +; CHECK-LABEL: @test1_vec_poison( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 0> +; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[A]], <i64 15, i64 poison> ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = and <2 x i32> %b, <i32 15, i32 undef> + %c = and <2 x i32> %b, <i32 15, i32 poison> %d = zext <2 x i32> %c to <2 x i64> call void @use_vec(<2 x i32> %b) ret <2 x i64> %d @@ -111,17 +111,17 @@ define <2 x i64> @test2_vec_nonuniform(<2 x i64> %a) { ret <2 x i64> %d } -define <2 x i64> @test2_vec_undef(<2 x i64> %a) { -; CHECK-LABEL: @test2_vec_undef( +define <2 x i64> @test2_vec_poison(<2 x i64> %a) { +; CHECK-LABEL: @test2_vec_poison( ; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 undef> -; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 undef> +; CHECK-NEXT: [[D1:%.*]] = shl <2 x i64> [[A]], <i64 36, i64 poison> +; CHECK-NEXT: [[D:%.*]] = ashr exact <2 x i64> [[D1]], <i64 36, i64 poison> ; CHECK-NEXT: call void @use_vec(<2 x i32> [[B]]) ; CHECK-NEXT: ret <2 x i64> [[D]] ; %b = trunc <2 x i64> %a to <2 x i32> - %c = shl <2 x i32> %b, <i32 4, i32 undef> - %q = ashr <2 x i32> %c, <i32 4, i32 undef> + %c = shl <2 x i32> %b, <i32 4, i32 poison> + %q = ashr <2 x i32> %c, <i32 4, i32 poison> %d = sext <2 x i32> %q to <2 x i64> call void @use_vec(<2 x i32> %b) ret <2 x i64> %d @@ -300,18 +300,17 @@ define <2 x i64> @test8_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test8_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test8_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = shl <2 x i128> [[D]], <i128 32, i128 undef> -; CHECK-NEXT: [[F:%.*]] = or <2 x i128> [[E]], [[C]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test8_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test8_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i64> +; CHECK-NEXT: [[E:%.*]] = shl nuw <2 x i64> [[D]], <i64 32, i64 poison> +; CHECK-NEXT: [[G:%.*]] = or disjoint <2 x i64> [[E]], [[C]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = shl <2 x i128> %D, <i128 32, i128 undef> + %E = shl <2 x i128> %D, <i128 32, i128 poison> %F = or <2 x i128> %E, %C %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -388,18 +387,17 @@ define <2 x i64> @test11_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test11_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test11_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = shl <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test11_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test11_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = shl nuw nsw <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = shl <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -453,18 +451,17 @@ define <2 x i64> @test12_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test12_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test12_vec_undef( -; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = lshr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc nuw nsw <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test12_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test12_vec_poison( +; CHECK-NEXT: [[C:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = lshr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = zext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = lshr <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -518,18 +515,17 @@ define <2 x i64> @test13_vec_nonuniform(<2 x i32> %A, <2 x i32> %B) { ret <2 x i64> %G } -define <2 x i64> @test13_vec_undef(<2 x i32> %A, <2 x i32> %B) { -; CHECK-LABEL: @test13_vec_undef( -; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i128> -; CHECK-NEXT: [[D:%.*]] = zext <2 x i32> [[B:%.*]] to <2 x i128> -; CHECK-NEXT: [[E:%.*]] = and <2 x i128> [[D]], <i128 31, i128 undef> -; CHECK-NEXT: [[F:%.*]] = ashr <2 x i128> [[C]], [[E]] -; CHECK-NEXT: [[G:%.*]] = trunc nsw <2 x i128> [[F]] to <2 x i64> +define <2 x i64> @test13_vec_poison(<2 x i32> %A, <2 x i32> %B) { +; CHECK-LABEL: @test13_vec_poison( +; CHECK-NEXT: [[C:%.*]] = sext <2 x i32> [[A:%.*]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[B:%.*]], <i32 31, i32 poison> +; CHECK-NEXT: [[E:%.*]] = zext nneg <2 x i32> [[TMP1]] to <2 x i64> +; CHECK-NEXT: [[G:%.*]] = ashr <2 x i64> [[C]], [[E]] ; CHECK-NEXT: ret <2 x i64> [[G]] ; %C = sext <2 x i32> %A to <2 x i128> %D = zext <2 x i32> %B to <2 x i128> - %E = and <2 x i128> %D, <i128 31, i128 undef> + %E = and <2 x i128> %D, <i128 31, i128 poison> %F = ashr <2 x i128> %C, %E %G = trunc <2 x i128> %F to <2 x i64> ret <2 x i64> %G @@ -766,13 +762,13 @@ define <2 x i32> @trunc_shl_v2i32_v2i64_uniform(<2 x i64> %val) { ret <2 x i32> %trunc } -define <2 x i32> @trunc_shl_v2i32_v2i64_undef(<2 x i64> %val) { -; CHECK-LABEL: @trunc_shl_v2i32_v2i64_undef( +define <2 x i32> @trunc_shl_v2i32_v2i64_poison(<2 x i64> %val) { +; CHECK-LABEL: @trunc_shl_v2i32_v2i64_poison( ; CHECK-NEXT: [[VAL_TR:%.*]] = trunc <2 x i64> [[VAL:%.*]] to <2 x i32> -; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 undef> +; CHECK-NEXT: [[TRUNC:%.*]] = shl <2 x i32> [[VAL_TR]], <i32 31, i32 poison> ; CHECK-NEXT: ret <2 x i32> [[TRUNC]] ; - %shl = shl <2 x i64> %val, <i64 31, i64 undef> + %shl = shl <2 x i64> %val, <i64 31, i64 poison> %trunc = trunc <2 x i64> %shl to <2 x i32> ret <2 x i32> %trunc } @@ -917,7 +913,7 @@ define <4 x i8> @wide_shuf(<4 x i32> %x) { ret <4 x i8> %trunc } -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask define <4 x i8> @wide_splat1(<4 x i32> %x) { ; CHECK-LABEL: @wide_splat1( @@ -925,13 +921,13 @@ define <4 x i8> @wide_splat1(<4 x i32> %x) { ; CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> ; CHECK-NEXT: ret <4 x i8> [[TRUNC]] ; - %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + %shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> %trunc = trunc <4 x i32> %shuf to <4 x i8> ret <4 x i8> %trunc } ; Test weird types. -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask define <3 x i31> @wide_splat2(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat2( @@ -939,14 +935,14 @@ define <3 x i31> @wide_splat2(<3 x i33> %x) { ; CHECK-NEXT: [[TRUNC:%.*]] = shufflevector <3 x i31> [[TMP1]], <3 x i31> poison, <3 x i32> <i32 1, i32 1, i32 1> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; - %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 1, i32 1, i32 1> + %shuf = shufflevector <3 x i33> %x, <3 x i33> poison, <3 x i32> <i32 1, i32 1, i32 1> %trunc = trunc <3 x i33> %shuf to <3 x i31> ret <3 x i31> %trunc } ; FIXME: -; trunc (shuffle X, undef, SplatMask) --> shuffle (trunc X), undef, SplatMask -; A mask with undef elements should still be considered a splat mask. +; trunc (shuffle X, poison, SplatMask) --> shuffle (trunc X), poison, SplatMask +; A mask with poison elements should still be considered a splat mask. define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-LABEL: @wide_splat3( @@ -954,7 +950,7 @@ define <3 x i31> @wide_splat3(<3 x i33> %x) { ; CHECK-NEXT: [[TRUNC:%.*]] = trunc <3 x i33> [[SHUF]] to <3 x i31> ; CHECK-NEXT: ret <3 x i31> [[TRUNC]] ; - %shuf = shufflevector <3 x i33> %x, <3 x i33> undef, <3 x i32> <i32 undef, i32 1, i32 1> + %shuf = shufflevector <3 x i33> %x, <3 x i33> poison, <3 x i32> <i32 poison, i32 1, i32 1> %trunc = trunc <3 x i33> %shuf to <3 x i31> ret <3 x i31> %trunc } diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll index 1ffcfb4..241d9cb 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-lack-of-overflow-check-via-udiv-of-allones.ll @@ -30,14 +30,14 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %r } -define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @t2_vec_undef( +define <3 x i1> @t2_vec_poison(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @t2_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[MUL]], 1 ; CHECK-NEXT: [[MUL_NOT_OV:%.*]] = xor <3 x i1> [[MUL_OV]], <i1 true, i1 true, i1 true> ; CHECK-NEXT: ret <3 x i1> [[MUL_NOT_OV]] ; - %t0 = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, %x + %t0 = udiv <3 x i8> <i8 -1, i8 poison, i8 -1>, %x %r = icmp uge <3 x i8> %t0, %y ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll index 710a09f..7eb08bd 100644 --- a/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll +++ b/llvm/test/Transforms/InstCombine/unsigned-mul-overflow-check-via-udiv-of-allones.ll @@ -28,13 +28,13 @@ define <2 x i1> @t1_vec(<2 x i8> %x, <2 x i8> %y) { ret <2 x i1> %r } -define <3 x i1> @t2_vec_undef(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @t2_vec_undef( +define <3 x i1> @t2_vec_poison(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @t2_vec_poison( ; CHECK-NEXT: [[MUL:%.*]] = call { <3 x i8>, <3 x i1> } @llvm.umul.with.overflow.v3i8(<3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) ; CHECK-NEXT: [[MUL_OV:%.*]] = extractvalue { <3 x i8>, <3 x i1> } [[MUL]], 1 ; CHECK-NEXT: ret <3 x i1> [[MUL_OV]] ; - %t0 = udiv <3 x i8> <i8 -1, i8 undef, i8 -1>, %x + %t0 = udiv <3 x i8> <i8 -1, i8 poison, i8 -1>, %x %r = icmp ult <3 x i8> %t0, %y ret <3 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll index adacf3c..262942a 100644 --- a/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll +++ b/llvm/test/Transforms/InstCombine/variable-signext-of-variable-high-bit-extraction.ll @@ -203,20 +203,20 @@ define <2 x i32> @t4_vec(<2 x i64> %data, <2 x i32> %nbits) { ret <2 x i32> %signextended } -define <3 x i32> @t5_vec_undef(<3 x i64> %data, <3 x i32> %nbits) { -; CHECK-LABEL: @t5_vec_undef( -; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <3 x i32> <i32 64, i32 64, i32 undef>, [[NBITS:%.*]] +define <3 x i32> @t5_vec_poison(<3 x i64> %data, <3 x i32> %nbits) { +; CHECK-LABEL: @t5_vec_poison( +; CHECK-NEXT: [[SKIP_HIGH:%.*]] = sub <3 x i32> <i32 64, i32 64, i32 poison>, [[NBITS:%.*]] ; CHECK-NEXT: [[SKIP_HIGH_WIDE:%.*]] = zext nneg <3 x i32> [[SKIP_HIGH]] to <3 x i64> ; CHECK-NEXT: [[TMP1:%.*]] = ashr <3 x i64> [[DATA:%.*]], [[SKIP_HIGH_WIDE]] ; CHECK-NEXT: [[SIGNEXTENDED:%.*]] = trunc <3 x i64> [[TMP1]] to <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[SIGNEXTENDED]] ; - %skip_high = sub <3 x i32> <i32 64, i32 64, i32 undef>, %nbits + %skip_high = sub <3 x i32> <i32 64, i32 64, i32 poison>, %nbits %skip_high_wide = zext <3 x i32> %skip_high to <3 x i64> %extracted = lshr <3 x i64> %data, %skip_high_wide %extracted_narrow = trunc <3 x i64> %extracted to <3 x i32> - %num_high_bits_to_smear_narrow0 = sub <3 x i32> <i32 32, i32 32, i32 undef>, %nbits - %num_high_bits_to_smear_narrow1 = sub <3 x i32> <i32 undef, i32 32, i32 32>, %nbits + %num_high_bits_to_smear_narrow0 = sub <3 x i32> <i32 32, i32 32, i32 poison>, %nbits + %num_high_bits_to_smear_narrow1 = sub <3 x i32> <i32 poison, i32 32, i32 32>, %nbits %signbit_positioned = shl <3 x i32> %extracted_narrow, %num_high_bits_to_smear_narrow0 %signextended = ashr <3 x i32> %signbit_positioned, %num_high_bits_to_smear_narrow1 ret <3 x i32> %signextended diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll index a880d5e..9f5f957 100644 --- a/llvm/test/Transforms/InstCombine/vec_sext.ll +++ b/llvm/test/Transforms/InstCombine/vec_sext.ll @@ -42,24 +42,24 @@ define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) ret <4 x i32> %cond } -define <2 x i32> @is_negative_undef_elt(<2 x i32> %a) { -; CHECK-LABEL: @is_negative_undef_elt( +define <2 x i32> @is_negative_poison_elt(<2 x i32> %a) { +; CHECK-LABEL: @is_negative_poison_elt( ; CHECK-NEXT: [[A_LOBIT:%.*]] = ashr <2 x i32> [[A:%.*]], <i32 31, i32 31> ; CHECK-NEXT: ret <2 x i32> [[A_LOBIT]] ; - %cmp = icmp slt <2 x i32> %a, <i32 0, i32 undef> + %cmp = icmp slt <2 x i32> %a, <i32 0, i32 poison> %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext } -define <2 x i32> @is_positive_undef_elt(<2 x i32> %a) { -; CHECK-LABEL: @is_positive_undef_elt( -; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 undef, i32 -1> +define <2 x i32> @is_positive_poison_elt(<2 x i32> %a) { +; CHECK-LABEL: @is_positive_poison_elt( +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> [[A:%.*]], <i32 poison, i32 -1> ; CHECK-NEXT: [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SEXT]] ; - %cmp = icmp sgt <2 x i32> %a, <i32 undef, i32 -1> + %cmp = icmp sgt <2 x i32> %a, <i32 poison, i32 -1> %sext = sext <2 x i1> %cmp to <2 x i32> ret <2 x i32> %sext } diff --git a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll index cf1b72f..a873646 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts-inseltpoison.ll @@ -26,26 +26,26 @@ define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) { ; This is trunc. -define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) { -; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt( +define <2 x i1> @and_cmp_is_trunc_even_with_poison_elt(<2 x i64> %a) { +; CHECK-LABEL: @and_cmp_is_trunc_even_with_poison_elt( ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %t = and <2 x i64> %a, <i64 undef, i64 1> + %t = and <2 x i64> %a, <i64 poison, i64 1> %r = icmp ne <2 x i64> %t, zeroinitializer ret <2 x i1> %r } -; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete. +; TODO: This could be just 1 instruction (trunc), but our poison matching is incomplete. -define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) { -; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts( -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1> -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0> +define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { +; CHECK-LABEL: @and_cmp_is_trunc_even_with_poison_elts( +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 poison, i64 1> +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 poison, i64 0> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %t = and <2 x i64> %a, <i64 undef, i64 1> - %r = icmp ne <2 x i64> %t, <i64 undef, i64 0> + %t = and <2 x i64> %a, <i64 poison, i64 1> + %r = icmp ne <2 x i64> %t, <i64 poison, i64 0> ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/vector-casts.ll b/llvm/test/Transforms/InstCombine/vector-casts.ll index 281fc5f..fd2a4ff 100644 --- a/llvm/test/Transforms/InstCombine/vector-casts.ll +++ b/llvm/test/Transforms/InstCombine/vector-casts.ll @@ -26,26 +26,26 @@ define <2 x i1> @and_cmp_is_trunc(<2 x i64> %a) { ; This is trunc. -define <2 x i1> @and_cmp_is_trunc_even_with_undef_elt(<2 x i64> %a) { -; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elt( +define <2 x i1> @and_cmp_is_trunc_even_with_poison_elt(<2 x i64> %a) { +; CHECK-LABEL: @and_cmp_is_trunc_even_with_poison_elt( ; CHECK-NEXT: [[R:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i1> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %t = and <2 x i64> %a, <i64 undef, i64 1> + %t = and <2 x i64> %a, <i64 poison, i64 1> %r = icmp ne <2 x i64> %t, zeroinitializer ret <2 x i1> %r } -; TODO: This could be just 1 instruction (trunc), but our undef matching is incomplete. +; TODO: This could be just 1 instruction (trunc), but our poison matching is incomplete. -define <2 x i1> @and_cmp_is_trunc_even_with_undef_elts(<2 x i64> %a) { -; CHECK-LABEL: @and_cmp_is_trunc_even_with_undef_elts( -; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 undef, i64 1> -; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 undef, i64 0> +define <2 x i1> @and_cmp_is_trunc_even_with_poison_elts(<2 x i64> %a) { +; CHECK-LABEL: @and_cmp_is_trunc_even_with_poison_elts( +; CHECK-NEXT: [[T:%.*]] = and <2 x i64> [[A:%.*]], <i64 poison, i64 1> +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i64> [[T]], <i64 poison, i64 0> ; CHECK-NEXT: ret <2 x i1> [[R]] ; - %t = and <2 x i64> %a, <i64 undef, i64 1> - %r = icmp ne <2 x i64> %t, <i64 undef, i64 0> + %t = and <2 x i64> %a, <i64 poison, i64 1> + %r = icmp ne <2 x i64> %t, <i64 poison, i64 0> ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstCombine/vector-urem.ll b/llvm/test/Transforms/InstCombine/vector-urem.ll index d5c7747..627789a 100644 --- a/llvm/test/Transforms/InstCombine/vector-urem.ll +++ b/llvm/test/Transforms/InstCombine/vector-urem.ll @@ -19,11 +19,11 @@ define <4 x i32> @test_v4i32_const_pow2(<4 x i32> %a0) { ret <4 x i32> %1 } -define <4 x i32> @test_v4i32_const_pow2_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_const_pow2_undef( +define <4 x i32> @test_v4i32_const_pow2_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_const_pow2_poison( ; CHECK-NEXT: ret <4 x i32> poison ; - %1 = urem <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 undef> + %1 = urem <4 x i32> %a0, <i32 1, i32 2, i32 4, i32 poison> ret <4 x i32> %1 } @@ -37,13 +37,13 @@ define <4 x i32> @test_v4i32_one(<4 x i32> %a0) { ret <4 x i32> %1 } -define <4 x i32> @test_v4i32_one_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_one_undef( +define <4 x i32> @test_v4i32_one_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_one_poison( ; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> [[A0:%.*]], <i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[TMP1]] to <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; - %1 = urem <4 x i32> <i32 1, i32 1, i32 1, i32 undef>, %a0 + %1 = urem <4 x i32> <i32 1, i32 1, i32 1, i32 poison>, %a0 ret <4 x i32> %1 } @@ -71,10 +71,10 @@ define <4 x i32> @test_v4i32_negconst(<4 x i32> %a0) { ret <4 x i32> %1 } -define <4 x i32> @test_v4i32_negconst_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_negconst_undef( +define <4 x i32> @test_v4i32_negconst_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_negconst_poison( ; CHECK-NEXT: ret <4 x i32> poison ; - %1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 undef> + %1 = urem <4 x i32> %a0, <i32 -3, i32 -5, i32 -7, i32 poison> ret <4 x i32> %1 } diff --git a/llvm/test/Transforms/InstCombine/vector-xor.ll b/llvm/test/Transforms/InstCombine/vector-xor.ll index 171dd6e..ee593b5 100644 --- a/llvm/test/Transforms/InstCombine/vector-xor.ll +++ b/llvm/test/Transforms/InstCombine/vector-xor.ll @@ -53,14 +53,14 @@ define <4 x i32> @test_v4i32_xor_bswap_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_xor_bswap_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_xor_bswap_const_undef( +define <4 x i32> @test_v4i32_xor_bswap_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_xor_bswap_const_poison( ; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[A0:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 undef, i32 0, i32 2, i32 3> +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 poison, i32 0, i32 2, i32 3> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %1 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0) - %2 = xor <4 x i32> %1, <i32 undef, i32 0, i32 2, i32 3> + %2 = xor <4 x i32> %1, <i32 poison, i32 0, i32 2, i32 3> ret <4 x i32> %2 } @@ -105,14 +105,14 @@ define <4 x i32> @test_v4i32_not_ashr_not(<4 x i32> %x, <4 x i32> %y) { ret <4 x i32> %3 } -define <4 x i32> @test_v4i32_not_ashr_not_undef(<4 x i32> %x, <4 x i32> %y) { -; CHECK-LABEL: @test_v4i32_not_ashr_not_undef( +define <4 x i32> @test_v4i32_not_ashr_not_poison(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @test_v4i32_not_ashr_not_poison( ; CHECK-NEXT: [[DOTNOT:%.*]] = ashr <4 x i32> [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: ret <4 x i32> [[DOTNOT]] ; - %1 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %x + %1 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 poison>, %x %2 = ashr <4 x i32> %1, %y - %3 = xor <4 x i32> <i32 -1, i32 -1, i32 undef, i32 -1>, %2 + %3 = xor <4 x i32> <i32 -1, i32 -1, i32 poison, i32 -1>, %2 ret <4 x i32> %3 } @@ -138,13 +138,13 @@ define <4 x i32> @test_v4i32_not_ashr_negative_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_not_ashr_negative_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_not_ashr_negative_const_undef( +define <4 x i32> @test_v4i32_not_ashr_negative_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_not_ashr_negative_const_poison( ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> <i32 2, i32 4, i32 0, i32 8>, [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = ashr <4 x i32> <i32 -3, i32 -5, i32 undef, i32 -9>, %a0 - %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1 + %1 = ashr <4 x i32> <i32 -3, i32 -5, i32 poison, i32 -9>, %a0 + %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 poison>, %1 ret <4 x i32> %2 } @@ -170,13 +170,13 @@ define <4 x i32> @test_v4i32_not_lshr_nonnegative_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_const_undef( +define <4 x i32> @test_v4i32_not_lshr_nonnegative_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_not_lshr_nonnegative_const_poison( ; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> <i32 -4, i32 -6, i32 -1, i32 -10>, [[A0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = lshr <4 x i32> <i32 3, i32 5, i32 undef, i32 9>, %a0 - %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1 + %1 = lshr <4 x i32> <i32 3, i32 5, i32 poison, i32 9>, %a0 + %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 poison>, %1 ret <4 x i32> %2 } @@ -202,13 +202,13 @@ define <4 x i32> @test_v4i32_not_sub_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_not_sub_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_not_sub_const_undef( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 undef, i32 0, i32 -16> +define <4 x i32> @test_v4i32_not_sub_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_not_sub_const_poison( +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 -4, i32 poison, i32 0, i32 -16> ; CHECK-NEXT: ret <4 x i32> [[TMP1]] ; - %1 = sub <4 x i32> <i32 3, i32 undef, i32 -1, i32 15>, %a0 - %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, %1 + %1 = sub <4 x i32> <i32 3, i32 poison, i32 -1, i32 15>, %a0 + %2 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 poison>, %1 ret <4 x i32> %2 } @@ -235,14 +235,14 @@ define <4 x i32> @test_v4i32_xor_signmask_sub_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_xor_signmask_sub_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const_undef( -; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 3, i32 undef, i32 -1, i32 15>, [[A0:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef> +define <4 x i32> @test_v4i32_xor_signmask_sub_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_xor_signmask_sub_const_poison( +; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> <i32 3, i32 poison, i32 -1, i32 15>, [[A0:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 poison> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; - %1 = sub <4 x i32> <i32 3, i32 undef, i32 -1, i32 15>, %a0 - %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>, %1 + %1 = sub <4 x i32> <i32 3, i32 poison, i32 -1, i32 15>, %a0 + %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 poison>, %1 ret <4 x i32> %2 } @@ -269,13 +269,13 @@ define <4 x i32> @test_v4i32_xor_signmask_add_const(<4 x i32> %a0) { ret <4 x i32> %2 } -define <4 x i32> @test_v4i32_xor_signmask_add_const_undef(<4 x i32> %a0) { -; CHECK-LABEL: @test_v4i32_xor_signmask_add_const_undef( -; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 3, i32 undef, i32 -1, i32 15> -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef> +define <4 x i32> @test_v4i32_xor_signmask_add_const_poison(<4 x i32> %a0) { +; CHECK-LABEL: @test_v4i32_xor_signmask_add_const_poison( +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[A0:%.*]], <i32 3, i32 poison, i32 -1, i32 15> +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 poison> ; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; - %1 = add <4 x i32> <i32 3, i32 undef, i32 -1, i32 15>, %a0 - %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 undef>, %1 + %1 = add <4 x i32> <i32 3, i32 poison, i32 -1, i32 15>, %a0 + %2 = xor <4 x i32> <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 poison>, %1 ret <4 x i32> %2 } diff --git a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll index 7fed952..12739b5 100644 --- a/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll +++ b/llvm/test/Transforms/InstCombine/zext-bool-add-sub.ll @@ -126,13 +126,13 @@ define <2 x i64> @zext_negate_vec(<2 x i1> %A) { ret <2 x i64> %sub } -define <2 x i64> @zext_negate_vec_undef_elt(<2 x i1> %A) { -; CHECK-LABEL: @zext_negate_vec_undef_elt( +define <2 x i64> @zext_negate_vec_poison_elt(<2 x i1> %A) { +; CHECK-LABEL: @zext_negate_vec_poison_elt( ; CHECK-NEXT: [[EXT_NEG:%.*]] = sext <2 x i1> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT_NEG]] ; %ext = zext <2 x i1> %A to <2 x i64> - %sub = sub <2 x i64> <i64 0, i64 undef>, %ext + %sub = sub <2 x i64> <i64 0, i64 poison>, %ext ret <2 x i64> %sub } @@ -169,13 +169,13 @@ define <2 x i64> @zext_sub_const_vec(<2 x i1> %A) { ret <2 x i64> %sub } -define <2 x i64> @zext_sub_const_vec_undef_elt(<2 x i1> %A) { -; CHECK-LABEL: @zext_sub_const_vec_undef_elt( -; CHECK-NEXT: [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 41, i64 undef>, <2 x i64> <i64 42, i64 undef> +define <2 x i64> @zext_sub_const_vec_poison_elt(<2 x i1> %A) { +; CHECK-LABEL: @zext_sub_const_vec_poison_elt( +; CHECK-NEXT: [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 41, i64 poison>, <2 x i64> <i64 42, i64 poison> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; %ext = zext <2 x i1> %A to <2 x i64> - %sub = sub <2 x i64> <i64 42, i64 undef>, %ext + %sub = sub <2 x i64> <i64 42, i64 poison>, %ext ret <2 x i64> %sub } @@ -212,13 +212,13 @@ define <2 x i64> @sext_negate_vec(<2 x i1> %A) { ret <2 x i64> %sub } -define <2 x i64> @sext_negate_vec_undef_elt(<2 x i1> %A) { -; CHECK-LABEL: @sext_negate_vec_undef_elt( +define <2 x i64> @sext_negate_vec_poison_elt(<2 x i1> %A) { +; CHECK-LABEL: @sext_negate_vec_poison_elt( ; CHECK-NEXT: [[EXT_NEG:%.*]] = zext <2 x i1> [[A:%.*]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT_NEG]] ; %ext = sext <2 x i1> %A to <2 x i64> - %sub = sub <2 x i64> <i64 0, i64 undef>, %ext + %sub = sub <2 x i64> <i64 0, i64 poison>, %ext ret <2 x i64> %sub } @@ -255,13 +255,13 @@ define <2 x i64> @sext_sub_const_vec(<2 x i1> %A) { ret <2 x i64> %sub } -define <2 x i64> @sext_sub_const_vec_undef_elt(<2 x i1> %A) { -; CHECK-LABEL: @sext_sub_const_vec_undef_elt( -; CHECK-NEXT: [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 undef, i64 43>, <2 x i64> <i64 undef, i64 42> +define <2 x i64> @sext_sub_const_vec_poison_elt(<2 x i1> %A) { +; CHECK-LABEL: @sext_sub_const_vec_poison_elt( +; CHECK-NEXT: [[SUB:%.*]] = select <2 x i1> [[A:%.*]], <2 x i64> <i64 poison, i64 43>, <2 x i64> <i64 poison, i64 42> ; CHECK-NEXT: ret <2 x i64> [[SUB]] ; %ext = sext <2 x i1> %A to <2 x i64> - %sub = sub <2 x i64> <i64 undef, i64 42>, %ext + %sub = sub <2 x i64> <i64 poison, i64 42>, %ext ret <2 x i64> %sub } diff --git a/llvm/test/Transforms/InstSimplify/AndOrXor.ll b/llvm/test/Transforms/InstSimplify/AndOrXor.ll index 494b6bc..2e3a605 100644 --- a/llvm/test/Transforms/InstSimplify/AndOrXor.ll +++ b/llvm/test/Transforms/InstSimplify/AndOrXor.ll @@ -12,11 +12,11 @@ define i8 @and0(i8 %x) { ret i8 %r } -define <2 x i8> @and0_vec_undef_elt(<2 x i8> %x) { -; CHECK-LABEL: @and0_vec_undef_elt( +define <2 x i8> @and0_vec_poison_elt(<2 x i8> %x) { +; CHECK-LABEL: @and0_vec_poison_elt( ; CHECK-NEXT: ret <2 x i8> zeroinitializer ; - %r = and <2 x i8> %x, <i8 undef, i8 0> + %r = and <2 x i8> %x, <i8 poison, i8 0> ret <2 x i8> %r } @@ -31,14 +31,14 @@ define <2 x i32> @add_nsw_signbit(<2 x i32> %x) { ret <2 x i32> %z } -; Undef elements in either constant vector are ok. +; Poison elements in either constant vector are ok. -define <2 x i32> @add_nsw_signbit_undef(<2 x i32> %x) { -; CHECK-LABEL: @add_nsw_signbit_undef( +define <2 x i32> @add_nsw_signbit_poison(<2 x i32> %x) { +; CHECK-LABEL: @add_nsw_signbit_poison( ; CHECK-NEXT: ret <2 x i32> [[X:%.*]] ; - %y = xor <2 x i32> %x, <i32 undef, i32 -2147483648> - %z = add nsw <2 x i32> %y, <i32 -2147483648, i32 undef> + %y = xor <2 x i32> %x, <i32 poison, i32 -2147483648> + %z = add nsw <2 x i32> %y, <i32 -2147483648, i32 poison> ret <2 x i32> %z } @@ -53,14 +53,14 @@ define <2 x i5> @add_nuw_signbit(<2 x i5> %x) { ret <2 x i5> %z } -; Undef elements in either constant vector are ok. +; Poison elements in either constant vector are ok. -define <2 x i5> @add_nuw_signbit_undef(<2 x i5> %x) { -; CHECK-LABEL: @add_nuw_signbit_undef( +define <2 x i5> @add_nuw_signbit_poison(<2 x i5> %x) { +; CHECK-LABEL: @add_nuw_signbit_poison( ; CHECK-NEXT: ret <2 x i5> [[X:%.*]] ; - %y = xor <2 x i5> %x, <i5 -16, i5 undef> - %z = add nuw <2 x i5> %y, <i5 undef, i5 -16> + %y = xor <2 x i5> %x, <i5 -16, i5 poison> + %z = add nuw <2 x i5> %y, <i5 poison, i5 -16> ret <2 x i5> %z } @@ -584,7 +584,7 @@ define <2 x i32> @or_xor_andn_commute2(<2 x i32> %a, <2 x i32> %b) { ; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %xor = xor <2 x i32> %a, %b - %neg = xor <2 x i32> %b, <i32 -1, i32 undef> + %neg = xor <2 x i32> %b, <i32 -1, i32 poison> %and = and <2 x i32> %a, %neg %or = or <2 x i32> %xor, %and ret <2 x i32> %or @@ -708,15 +708,13 @@ define <2 x i32> @or_xorn_and_commute2_undef(<2 x i32> %a, <2 x i32> %b) { ret <2 x i32> %or } -; TODO: Unlike the above test, this is safe to fold. +; Unlike the above test, this is safe to fold. define <2 x i32> @or_xorn_and_commute2_poison(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: @or_xorn_and_commute2_poison( ; CHECK-NEXT: [[NEGA:%.*]] = xor <2 x i32> [[A:%.*]], <i32 poison, i32 -1> -; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[B:%.*]], [[A]] -; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[B]], [[NEGA]] -; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[XOR]], [[AND]] -; CHECK-NEXT: ret <2 x i32> [[OR]] +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[B:%.*]], [[NEGA]] +; CHECK-NEXT: ret <2 x i32> [[XOR]] ; %nega = xor <2 x i32> %a, <i32 poison, i32 -1> %and = and <2 x i32> %b, %a diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 52c207a..c6f6b65 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -976,7 +976,7 @@ define <2 x i8> @fshr_zero_vec(<2 x i8> %shamt) { ; CHECK-LABEL: @fshr_zero_vec( ; CHECK-NEXT: ret <2 x i8> zeroinitializer ; - %r = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> zeroinitializer, <2 x i8> <i8 0, i8 undef>, <2 x i8> %shamt) + %r = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> zeroinitializer, <2 x i8> <i8 0, i8 poison>, <2 x i8> %shamt) ret <2 x i8> %r } @@ -984,7 +984,7 @@ define <2 x i7> @fshl_ones_vec(<2 x i7> %shamt) { ; CHECK-LABEL: @fshl_ones_vec( ; CHECK-NEXT: ret <2 x i7> <i7 -1, i7 -1> ; - %r = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> <i7 undef, i7 -1>, <2 x i7> <i7 -1, i7 undef>, <2 x i7> %shamt) + %r = call <2 x i7> @llvm.fshl.v2i7(<2 x i7> <i7 poison, i7 -1>, <2 x i7> <i7 -1, i7 poison>, <2 x i7> %shamt) ret <2 x i7> %r } @@ -1466,7 +1466,7 @@ define <3 x i33> @cttz_shl1_vec(<3 x i33> %x) { ; CHECK-LABEL: @cttz_shl1_vec( ; CHECK-NEXT: ret <3 x i33> [[X:%.*]] ; - %s = shl <3 x i33> <i33 1, i33 1, i33 undef>, %x + %s = shl <3 x i33> <i33 1, i33 1, i33 poison>, %x %r = call <3 x i33> @llvm.cttz.v3i33(<3 x i33> %s, i1 false) ret <3 x i33> %r } @@ -1509,7 +1509,7 @@ define <3 x i33> @ctlz_lshr_sign_bit_vec(<3 x i33> %x) { ; CHECK-LABEL: @ctlz_lshr_sign_bit_vec( ; CHECK-NEXT: ret <3 x i33> [[X:%.*]] ; - %s = lshr <3 x i33> <i33 undef, i33 4294967296, i33 4294967296>, %x + %s = lshr <3 x i33> <i33 poison, i33 4294967296, i33 4294967296>, %x %r = call <3 x i33> @llvm.ctlz.v3i33(<3 x i33> %s, i1 false) ret <3 x i33> %r } @@ -1549,7 +1549,7 @@ define <3 x i33> @ctlz_ashr_sign_bit_vec(<3 x i33> %x) { ; CHECK-LABEL: @ctlz_ashr_sign_bit_vec( ; CHECK-NEXT: ret <3 x i33> zeroinitializer ; - %s = ashr <3 x i33> <i33 4294967296, i33 undef, i33 4294967296>, %x + %s = ashr <3 x i33> <i33 4294967296, i33 poison, i33 4294967296>, %x %r = call <3 x i33> @llvm.ctlz.v3i33(<3 x i33> %s, i1 true) ret <3 x i33> %r } diff --git a/llvm/test/Transforms/InstSimplify/compare.ll b/llvm/test/Transforms/InstSimplify/compare.ll index 1e90f0e..724912d 100644 --- a/llvm/test/Transforms/InstSimplify/compare.ll +++ b/llvm/test/Transforms/InstSimplify/compare.ll @@ -1659,21 +1659,21 @@ define <2 x i1> @icmp_shl_1_ugt_signmask(<2 x i8> %V) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_shl_1_ugt_signmask_undef(<2 x i8> %V) { -; CHECK-LABEL: @icmp_shl_1_ugt_signmask_undef( +define <2 x i1> @icmp_shl_1_ugt_signmask_poison(<2 x i8> %V) { +; CHECK-LABEL: @icmp_shl_1_ugt_signmask_poison( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %shl = shl <2 x i8> <i8 1, i8 1>, %V - %cmp = icmp ugt <2 x i8> %shl, <i8 128, i8 undef> + %cmp = icmp ugt <2 x i8> %shl, <i8 128, i8 poison> ret <2 x i1> %cmp } -define <2 x i1> @icmp_shl_1_ugt_signmask_undef2(<2 x i8> %V) { -; CHECK-LABEL: @icmp_shl_1_ugt_signmask_undef2( +define <2 x i1> @icmp_shl_1_ugt_signmask_poison2(<2 x i8> %V) { +; CHECK-LABEL: @icmp_shl_1_ugt_signmask_poison2( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; - %shl = shl <2 x i8> <i8 1, i8 undef>, %V - %cmp = icmp ugt <2 x i8> %shl, <i8 undef, i8 128> + %shl = shl <2 x i8> <i8 1, i8 poison>, %V + %cmp = icmp ugt <2 x i8> %shl, <i8 poison, i8 128> ret <2 x i1> %cmp } @@ -1695,21 +1695,21 @@ define <2 x i1> @icmp_shl_1_ule_signmask(<2 x i8> %V) { ret <2 x i1> %cmp } -define <2 x i1> @icmp_shl_1_ule_signmask_undef(<2 x i8> %V) { -; CHECK-LABEL: @icmp_shl_1_ule_signmask_undef( +define <2 x i1> @icmp_shl_1_ule_signmask_poison(<2 x i8> %V) { +; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; %shl = shl <2 x i8> <i8 1, i8 1>, %V - %cmp = icmp ule <2 x i8> %shl, <i8 128, i8 undef> + %cmp = icmp ule <2 x i8> %shl, <i8 128, i8 poison> ret <2 x i1> %cmp } -define <2 x i1> @icmp_shl_1_ule_signmask_undef2(<2 x i8> %V) { -; CHECK-LABEL: @icmp_shl_1_ule_signmask_undef2( +define <2 x i1> @icmp_shl_1_ule_signmask_poison2(<2 x i8> %V) { +; CHECK-LABEL: @icmp_shl_1_ule_signmask_poison2( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %shl = shl <2 x i8> <i8 1, i8 undef>, %V - %cmp = icmp ule <2 x i8> %shl, <i8 undef, i8 128> + %shl = shl <2 x i8> <i8 1, i8 poison>, %V + %cmp = icmp ule <2 x i8> %shl, <i8 poison, i8 128> ret <2 x i1> %cmp } @@ -1731,12 +1731,12 @@ define <2 x i1> @shl_1_cmp_eq_nonpow2_splat(<2 x i32> %x) { ret <2 x i1> %c } -define <2 x i1> @shl_1_cmp_eq_nonpow2_splat_undef(<2 x i32> %x) { -; CHECK-LABEL: @shl_1_cmp_eq_nonpow2_splat_undef( +define <2 x i1> @shl_1_cmp_eq_nonpow2_splat_poison(<2 x i32> %x) { +; CHECK-LABEL: @shl_1_cmp_eq_nonpow2_splat_poison( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; %s = shl <2 x i32> <i32 1, i32 1>, %x - %c = icmp eq <2 x i32> %s, <i32 31, i32 undef> + %c = icmp eq <2 x i32> %s, <i32 31, i32 poison> ret <2 x i1> %c } @@ -1758,12 +1758,12 @@ define <2 x i1> @shl_1_cmp_ne_nonpow2_splat(<2 x i32> %x) { ret <2 x i1> %c } -define <2 x i1> @shl_1_cmp_ne_nonpow2_splat_undef(<2 x i32> %x) { -; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat_undef( +define <2 x i1> @shl_1_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { +; CHECK-LABEL: @shl_1_cmp_ne_nonpow2_splat_poison( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %s = shl <2 x i32> <i32 undef, i32 1>, %x - %c = icmp ne <2 x i32> %s, <i32 42, i32 undef> + %s = shl <2 x i32> <i32 poison, i32 1>, %x + %c = icmp ne <2 x i32> %s, <i32 42, i32 poison> ret <2 x i1> %c } @@ -1776,12 +1776,12 @@ define i1 @shl_pow2_cmp_eq_nonpow2(i32 %x) { ret i1 %c } -define <2 x i1> @shl_pow21_cmp_ne_nonpow2_splat_undef(<2 x i32> %x) { -; CHECK-LABEL: @shl_pow21_cmp_ne_nonpow2_splat_undef( +define <2 x i1> @shl_pow21_cmp_ne_nonpow2_splat_poison(<2 x i32> %x) { +; CHECK-LABEL: @shl_pow21_cmp_ne_nonpow2_splat_poison( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %s = shl <2 x i32> <i32 undef, i32 4>, %x - %c = icmp ne <2 x i32> %s, <i32 31, i32 undef> + %s = shl <2 x i32> <i32 poison, i32 4>, %x + %c = icmp ne <2 x i32> %s, <i32 31, i32 poison> ret <2 x i1> %c } @@ -1820,12 +1820,12 @@ define i1 @shl_pow2_cmp_eq_zero_nuw(i32 %x) { ret i1 %c } -define <2 x i1> @shl_pow2_cmp_ne_zero_nuw_splat_undef(<2 x i32> %x) { -; CHECK-LABEL: @shl_pow2_cmp_ne_zero_nuw_splat_undef( +define <2 x i1> @shl_pow2_cmp_ne_zero_nuw_splat_poison(<2 x i32> %x) { +; CHECK-LABEL: @shl_pow2_cmp_ne_zero_nuw_splat_poison( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %s = shl nuw <2 x i32> <i32 16, i32 undef>, %x - %c = icmp ne <2 x i32> %s, <i32 undef, i32 0> + %s = shl nuw <2 x i32> <i32 16, i32 poison>, %x + %c = icmp ne <2 x i32> %s, <i32 poison, i32 0> ret <2 x i1> %c } @@ -1838,12 +1838,12 @@ define i1 @shl_pow2_cmp_ne_zero_nsw(i32 %x) { ret i1 %c } -define <2 x i1> @shl_pow2_cmp_eq_zero_nsw_splat_undef(<2 x i32> %x) { -; CHECK-LABEL: @shl_pow2_cmp_eq_zero_nsw_splat_undef( +define <2 x i1> @shl_pow2_cmp_eq_zero_nsw_splat_poison(<2 x i32> %x) { +; CHECK-LABEL: @shl_pow2_cmp_eq_zero_nsw_splat_poison( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; - %s = shl nsw <2 x i32> <i32 undef, i32 16>, %x - %c = icmp eq <2 x i32> %s, <i32 0, i32 undef> + %s = shl nsw <2 x i32> <i32 poison, i32 16>, %x + %c = icmp eq <2 x i32> %s, <i32 0, i32 poison> ret <2 x i1> %c } diff --git a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll index 7c9d9a9..92d6cc3 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-add-nuw-allones-to-allones.ll @@ -63,11 +63,11 @@ define <2 x i8> @add_vec(<2 x i8> %x) { ret <2 x i8> %ret } -define <3 x i8> @add_vec_undef(<3 x i8> %x) { -; CHECK-LABEL: @add_vec_undef( -; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 undef, i8 -1> +define <3 x i8> @add_vec_poison(<3 x i8> %x) { +; CHECK-LABEL: @add_vec_poison( +; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 poison, i8 -1> ; - %ret = add nuw <3 x i8> %x, <i8 -1, i8 undef, i8 -1> + %ret = add nuw <3 x i8> %x, <i8 -1, i8 poison, i8 -1> ret <3 x i8> %ret } diff --git a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll index b5b5773..3f4a088 100644 --- a/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll +++ b/llvm/test/Transforms/InstSimplify/constantfold-shl-nuw-C-to-C.ll @@ -78,11 +78,11 @@ define <2 x i8> @shl_vec(<2 x i8> %x) { ret <2 x i8> %ret } -define <3 x i8> @shl_vec_undef(<3 x i8> %x) { -; CHECK-LABEL: @shl_vec_undef( -; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 undef, i8 -1> +define <3 x i8> @shl_vec_poison(<3 x i8> %x) { +; CHECK-LABEL: @shl_vec_poison( +; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 poison, i8 -1> ; - %ret = shl nuw <3 x i8> <i8 -1, i8 undef, i8 -1>, %x + %ret = shl nuw <3 x i8> <i8 -1, i8 poison, i8 -1>, %x ret <3 x i8> %ret } diff --git a/llvm/test/Transforms/InstSimplify/div.ll b/llvm/test/Transforms/InstSimplify/div.ll index e13b6f1..5ca2e88 100644 --- a/llvm/test/Transforms/InstSimplify/div.ll +++ b/llvm/test/Transforms/InstSimplify/div.ll @@ -17,11 +17,11 @@ define <2 x i32> @zero_dividend_vector(<2 x i32> %A) { ret <2 x i32> %B } -define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { -; CHECK-LABEL: @zero_dividend_vector_undef_elt( +define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) { +; CHECK-LABEL: @zero_dividend_vector_poison_elt( ; CHECK-NEXT: ret <2 x i32> zeroinitializer ; - %B = sdiv <2 x i32> <i32 0, i32 undef>, %A + %B = sdiv <2 x i32> <i32 0, i32 poison>, %A ret <2 x i32> %B } @@ -59,23 +59,23 @@ define <2 x i8> @udiv_zero_elt_vec(<2 x i8> %x) { ret <2 x i8> %div } -define <2 x i8> @sdiv_undef_elt_vec(<2 x i8> %x) { -; CHECK-LABEL: @sdiv_undef_elt_vec( +define <2 x i8> @sdiv_poison_elt_vec(<2 x i8> %x) { +; CHECK-LABEL: @sdiv_poison_elt_vec( ; CHECK-NEXT: ret <2 x i8> poison ; - %div = sdiv <2 x i8> %x, <i8 -42, i8 undef> + %div = sdiv <2 x i8> %x, <i8 -42, i8 poison> ret <2 x i8> %div } -define <2 x i8> @udiv_undef_elt_vec(<2 x i8> %x) { -; CHECK-LABEL: @udiv_undef_elt_vec( +define <2 x i8> @udiv_poison_elt_vec(<2 x i8> %x) { +; CHECK-LABEL: @udiv_poison_elt_vec( ; CHECK-NEXT: ret <2 x i8> poison ; - %div = udiv <2 x i8> %x, <i8 undef, i8 42> + %div = udiv <2 x i8> %x, <i8 poison, i8 42> ret <2 x i8> %div } -; Division-by-zero is undef. UB in any vector lane means the whole op is undef. +; Division-by-zero is poison. UB in any vector lane means the whole op is poison. ; Thus, we can simplify this: if any element of 'y' is 0, we can do anything. ; Therefore, assume that all elements of 'y' must be 1. diff --git a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll index 4938987..b1d7728 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math-strictfp.ll @@ -18,11 +18,11 @@ define float @mul_zero_2(float %a) #0 { ret float %b } -define <2 x float> @mul_zero_nsz_nnan_vec_undef(<2 x float> %a) #0 { -; CHECK-LABEL: @mul_zero_nsz_nnan_vec_undef( +define <2 x float> @mul_zero_nsz_nnan_vec_poison(<2 x float> %a) #0 { +; CHECK-LABEL: @mul_zero_nsz_nnan_vec_poison( ; CHECK-NEXT: ret <2 x float> zeroinitializer ; - %b = call nsz nnan <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %a, <2 x float><float 0.0, float undef>, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %b = call nsz nnan <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float> %a, <2 x float><float 0.0, float poison>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %b } @@ -98,13 +98,13 @@ define <2 x float> @fadd_unary_fnegx_commute_vec(<2 x float> %x) #0 { ret <2 x float> %r } -define <2 x float> @fadd_fnegx_commute_vec_undef(<2 x float> %x) #0 { -; CHECK-LABEL: @fadd_fnegx_commute_vec_undef( -; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.000000e+00>, <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fadd_fnegx_commute_vec_poison(<2 x float> %x) #0 { +; CHECK-LABEL: @fadd_fnegx_commute_vec_poison( +; CHECK-NEXT: [[NEGX:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.000000e+00>, <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[R:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[X]], <2 x float> [[NEGX]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[R]] ; - %negx = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.0>, <2 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %negx = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.0>, <2 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") %r = call nnan <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %negx, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %r } @@ -240,34 +240,34 @@ define float @fneg_x(float %a) #0 { ret float %ret } -define <2 x float> @fsub_0_0_x_vec_undef1(<2 x float> %a) #0 { -; CHECK-LABEL: @fsub_0_0_x_vec_undef1( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float undef>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fsub_0_0_x_vec_poison1(<2 x float> %a) #0 { +; CHECK-LABEL: @fsub_0_0_x_vec_poison1( +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[RET:%.*]] = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> zeroinitializer, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; - %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.0, float undef>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.0, float poison>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") %ret = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> zeroinitializer, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } -define <2 x float> @fneg_x_vec_undef1(<2 x float> %a) #0 { -; CHECK-LABEL: @fneg_x_vec_undef1( +define <2 x float> @fneg_x_vec_poison1(<2 x float> %a) #0 { +; CHECK-LABEL: @fneg_x_vec_poison1( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; %t1 = fneg <2 x float> %a - %ret = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.0, float undef>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.0, float poison>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } -define <2 x float> @fsub_0_0_x_vec_undef2(<2 x float> %a) #0 { -; CHECK-LABEL: @fsub_0_0_x_vec_undef2( +define <2 x float> @fsub_0_0_x_vec_poison2(<2 x float> %a) #0 { +; CHECK-LABEL: @fsub_0_0_x_vec_poison2( ; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> zeroinitializer, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[RET:%.*]] = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.000000e+00>, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[RET:%.*]] = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.000000e+00>, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> zeroinitializer, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") - %ret = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.0>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.0>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } @@ -281,11 +281,11 @@ define <2 x float> @fadd_zero_nsz_vec(<2 x float> %x) #0 { ret <2 x float> %r } -define <2 x float> @fadd_zero_nsz_vec_undef(<2 x float> %x) #0 { -; CHECK-LABEL: @fadd_zero_nsz_vec_undef( +define <2 x float> @fadd_zero_nsz_vec_poison(<2 x float> %x) #0 { +; CHECK-LABEL: @fadd_zero_nsz_vec_poison( ; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; - %r = call nsz <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> <float 0.0, float undef>, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call nsz <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> <float 0.0, float poison>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %r } @@ -375,11 +375,11 @@ define double @fdiv_zero_by_x(double %x) #0 { ret double %r } -define <2 x double> @fdiv_zero_by_x_vec_undef(<2 x double> %x) #0 { -; CHECK-LABEL: @fdiv_zero_by_x_vec_undef( +define <2 x double> @fdiv_zero_by_x_vec_poison(<2 x double> %x) #0 { +; CHECK-LABEL: @fdiv_zero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> zeroinitializer ; - %r = call nnan nsz <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> <double 0.0, double undef>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call nnan nsz <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double> <double 0.0, double poison>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x double> %r } @@ -394,11 +394,11 @@ define double @frem_zero_by_x(double %x) #0 { ret double %r } -define <2 x double> @frem_poszero_by_x_vec_undef(<2 x double> %x) #0 { -; CHECK-LABEL: @frem_poszero_by_x_vec_undef( +define <2 x double> @frem_poszero_by_x_vec_poison(<2 x double> %x) #0 { +; CHECK-LABEL: @frem_poszero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> zeroinitializer ; - %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> <double 0.0, double undef>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> <double 0.0, double poison>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x double> %r } @@ -413,11 +413,11 @@ define double @frem_negzero_by_x(double %x) #0 { ret double %r } -define <2 x double> @frem_negzero_by_x_vec_undef(<2 x double> %x) #0 { -; CHECK-LABEL: @frem_negzero_by_x_vec_undef( +define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) #0 { +; CHECK-LABEL: @frem_negzero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> <double -0.000000e+00, double -0.000000e+00> ; - %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> <double undef, double -0.0>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call nnan <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double> <double poison, double -0.0>, <2 x double> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x double> %r } @@ -493,13 +493,13 @@ define float @fdiv_neg_swapped2(float %f) #0 { ret float %div } -define <2 x float> @fdiv_neg_vec_undef_elt(<2 x float> %f) #0 { -; CHECK-LABEL: @fdiv_neg_vec_undef_elt( -; CHECK-NEXT: [[NEG:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float undef>, <2 x float> [[F:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fdiv_neg_vec_poison_elt(<2 x float> %f) #0 { +; CHECK-LABEL: @fdiv_neg_vec_poison_elt( +; CHECK-NEXT: [[NEG:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> [[F:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[DIV:%.*]] = call nnan <2 x float> @llvm.experimental.constrained.fdiv.v2f32(<2 x float> [[F]], <2 x float> [[NEG]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[DIV]] ; - %neg = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float undef>, <2 x float> %f, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %neg = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float 0.000000e+00, float poison>, <2 x float> %f, metadata !"round.tonearest", metadata !"fpexcept.ignore") %div = call nnan <2 x float> @llvm.experimental.constrained.fdiv.v2f32(<2 x float> %f, <2 x float> %neg, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %div } diff --git a/llvm/test/Transforms/InstSimplify/fast-math.ll b/llvm/test/Transforms/InstSimplify/fast-math.ll index d1818e6..287f30b 100644 --- a/llvm/test/Transforms/InstSimplify/fast-math.ll +++ b/llvm/test/Transforms/InstSimplify/fast-math.ll @@ -18,11 +18,11 @@ define float @mul_zero_2(float %a) { ret float %b } -define <2 x float> @mul_zero_nsz_nnan_vec_undef(<2 x float> %a) { -; CHECK-LABEL: @mul_zero_nsz_nnan_vec_undef( +define <2 x float> @mul_zero_nsz_nnan_vec_poison(<2 x float> %a) { +; CHECK-LABEL: @mul_zero_nsz_nnan_vec_poison( ; CHECK-NEXT: ret <2 x float> zeroinitializer ; - %b = fmul nsz nnan <2 x float> %a, <float 0.0, float undef> + %b = fmul nsz nnan <2 x float> %a, <float 0.0, float poison> ret <2 x float> %b } @@ -94,11 +94,11 @@ define <2 x float> @fadd_unary_fnegx_commute_vec(<2 x float> %x) { ret <2 x float> %r } -define <2 x float> @fadd_fnegx_commute_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fadd_fnegx_commute_vec_undef( +define <2 x float> @fadd_fnegx_commute_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fadd_fnegx_commute_vec_poison( ; CHECK-NEXT: ret <2 x float> zeroinitializer ; - %negx = fsub <2 x float> <float undef, float -0.0>, %x + %negx = fsub <2 x float> <float poison, float -0.0>, %x %r = fadd nnan <2 x float> %x, %negx ret <2 x float> %r } @@ -226,30 +226,30 @@ define float @fneg_x(float %a) { ret float %ret } -define <2 x float> @fsub_0_0_x_vec_undef1(<2 x float> %a) { -; CHECK-LABEL: @fsub_0_0_x_vec_undef1( +define <2 x float> @fsub_0_0_x_vec_poison1(<2 x float> %a) { +; CHECK-LABEL: @fsub_0_0_x_vec_poison1( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; - %t1 = fsub <2 x float> <float 0.0, float undef>, %a + %t1 = fsub <2 x float> <float 0.0, float poison>, %a %ret = fsub nsz <2 x float> zeroinitializer, %t1 ret <2 x float> %ret } -define <2 x float> @fneg_x_vec_undef1(<2 x float> %a) { -; CHECK-LABEL: @fneg_x_vec_undef1( +define <2 x float> @fneg_x_vec_poison1(<2 x float> %a) { +; CHECK-LABEL: @fneg_x_vec_poison1( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; %t1 = fneg <2 x float> %a - %ret = fsub nsz <2 x float> <float 0.0, float undef>, %t1 + %ret = fsub nsz <2 x float> <float 0.0, float poison>, %t1 ret <2 x float> %ret } -define <2 x float> @fsub_0_0_x_vec_undef2(<2 x float> %a) { -; CHECK-LABEL: @fsub_0_0_x_vec_undef2( +define <2 x float> @fsub_0_0_x_vec_poison2(<2 x float> %a) { +; CHECK-LABEL: @fsub_0_0_x_vec_poison2( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; %t1 = fsub <2 x float> zeroinitializer, %a - %ret = fsub nsz <2 x float> <float undef, float -0.0>, %t1 + %ret = fsub nsz <2 x float> <float poison, float -0.0>, %t1 ret <2 x float> %ret } @@ -263,11 +263,11 @@ define <2 x float> @fadd_zero_nsz_vec(<2 x float> %x) { ret <2 x float> %r } -define <2 x float> @fadd_zero_nsz_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fadd_zero_nsz_vec_undef( +define <2 x float> @fadd_zero_nsz_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fadd_zero_nsz_vec_poison( ; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; - %r = fadd nsz <2 x float> %x, <float 0.0, float undef> + %r = fadd nsz <2 x float> %x, <float 0.0, float poison> ret <2 x float> %r } @@ -357,11 +357,11 @@ define double @fdiv_zero_by_x(double %x) { ret double %r } -define <2 x double> @fdiv_zero_by_x_vec_undef(<2 x double> %x) { -; CHECK-LABEL: @fdiv_zero_by_x_vec_undef( +define <2 x double> @fdiv_zero_by_x_vec_poison(<2 x double> %x) { +; CHECK-LABEL: @fdiv_zero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> zeroinitializer ; - %r = fdiv nnan nsz <2 x double> <double 0.0, double undef>, %x + %r = fdiv nnan nsz <2 x double> <double 0.0, double poison>, %x ret <2 x double> %r } @@ -376,11 +376,11 @@ define double @frem_zero_by_x(double %x) { ret double %r } -define <2 x double> @frem_poszero_by_x_vec_undef(<2 x double> %x) { -; CHECK-LABEL: @frem_poszero_by_x_vec_undef( +define <2 x double> @frem_poszero_by_x_vec_poison(<2 x double> %x) { +; CHECK-LABEL: @frem_poszero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> zeroinitializer ; - %r = frem nnan <2 x double> <double 0.0, double undef>, %x + %r = frem nnan <2 x double> <double 0.0, double poison>, %x ret <2 x double> %r } @@ -395,11 +395,11 @@ define double @frem_negzero_by_x(double %x) { ret double %r } -define <2 x double> @frem_negzero_by_x_vec_undef(<2 x double> %x) { -; CHECK-LABEL: @frem_negzero_by_x_vec_undef( +define <2 x double> @frem_negzero_by_x_vec_poison(<2 x double> %x) { +; CHECK-LABEL: @frem_negzero_by_x_vec_poison( ; CHECK-NEXT: ret <2 x double> <double -0.000000e+00, double -0.000000e+00> ; - %r = frem nnan <2 x double> <double undef, double -0.0>, %x + %r = frem nnan <2 x double> <double poison, double -0.0>, %x ret <2 x double> %r } @@ -467,11 +467,11 @@ define float @fdiv_neg_swapped2(float %f) { ret float %div } -define <2 x float> @fdiv_neg_vec_undef_elt(<2 x float> %f) { -; CHECK-LABEL: @fdiv_neg_vec_undef_elt( +define <2 x float> @fdiv_neg_vec_poison_elt(<2 x float> %f) { +; CHECK-LABEL: @fdiv_neg_vec_poison_elt( ; CHECK-NEXT: ret <2 x float> <float -1.000000e+00, float -1.000000e+00> ; - %neg = fsub <2 x float> <float 0.0, float undef>, %f + %neg = fsub <2 x float> <float 0.0, float poison>, %f %div = fdiv nnan <2 x float> %f, %neg ret <2 x float> %div } diff --git a/llvm/test/Transforms/InstSimplify/fdiv.ll b/llvm/test/Transforms/InstSimplify/fdiv.ll index 38e3125..fb59011 100644 --- a/llvm/test/Transforms/InstSimplify/fdiv.ll +++ b/llvm/test/Transforms/InstSimplify/fdiv.ll @@ -110,11 +110,11 @@ define <2 x float> @fdiv_nnan_ninf_by_undef_v2f32(<2 x float> %x) { ret <2 x float> %fdiv } -define <2 x float> @fdiv_nnan_ninf_by_zero_undef_v2f32(<2 x float> %x) { -; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_undef_v2f32( +define <2 x float> @fdiv_nnan_ninf_by_zero_poison_v2f32(<2 x float> %x) { +; CHECK-LABEL: @fdiv_nnan_ninf_by_zero_poison_v2f32( ; CHECK-NEXT: ret <2 x float> poison ; - %fdiv = fdiv nnan ninf <2 x float> %x, <float 0.0, float undef> + %fdiv = fdiv nnan ninf <2 x float> %x, <float 0.0, float poison> ret <2 x float> %fdiv } diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll index e4748a2..32ea4cb 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic-strictfp.ll @@ -24,23 +24,23 @@ define <2 x float> @fsub_-0_x_vec(<2 x float> %a) #0 { ret <2 x float> %ret } -define <2 x float> @fsub_-0_x_vec_undef_elts(<2 x float> %a) #0 { -; CHECK-LABEL: @fsub_-0_x_vec_undef_elts( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float undef>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fsub_-0_x_vec_poison_elts(<2 x float> %a) #0 { +; CHECK-LABEL: @fsub_-0_x_vec_poison_elts( +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float poison>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: [[RET:%.*]] = fneg <2 x float> [[T1]] ; CHECK-NEXT: ret <2 x float> [[RET]] ; - %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float undef>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float poison>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") %ret = fneg <2 x float> %t1 ret <2 x float> %ret } -define <2 x float> @fsub_negzero_vec_undef_elts(<2 x float> %x) #0 { -; CHECK-LABEL: @fsub_negzero_vec_undef_elts( -; CHECK-NEXT: [[R:%.*]] = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.000000e+00>, <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fsub_negzero_vec_poison_elts(<2 x float> %x) #0 { +; CHECK-LABEL: @fsub_negzero_vec_poison_elts( +; CHECK-NEXT: [[R:%.*]] = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.000000e+00>, <2 x float> [[X:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[R]] ; - %r = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float undef, float -0.0>, <2 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call nsz <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float poison, float -0.0>, <2 x float> %x, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %r } @@ -86,23 +86,23 @@ define <2 x float> @fneg_x_vec(<2 x float> %a) #0 { ret <2 x float> %ret } -define <2 x float> @fsub_-0_-0_x_vec_undef_elts(<2 x float> %a) #0 { -; CHECK-LABEL: @fsub_-0_-0_x_vec_undef_elts( -; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float undef, float -0.000000e+00>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float undef>, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fsub_-0_-0_x_vec_poison_elts(<2 x float> %a) #0 { +; CHECK-LABEL: @fsub_-0_-0_x_vec_poison_elts( +; CHECK-NEXT: [[T1:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float poison, float -0.000000e+00>, <2 x float> [[A:%.*]], metadata !"round.tonearest", metadata !"fpexcept.ignore") +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> <float -0.000000e+00, float poison>, <2 x float> [[T1]], metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; - %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float undef, float -0.0>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") - %ret = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float undef>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %t1 = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float poison, float -0.0>, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float poison>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } -define <2 x float> @fneg_x_vec_undef_elts(<2 x float> %a) #0 { -; CHECK-LABEL: @fneg_x_vec_undef_elts( +define <2 x float> @fneg_x_vec_poison_elts(<2 x float> %a) #0 { +; CHECK-LABEL: @fneg_x_vec_poison_elts( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; %t1 = fneg <2 x float> %a - %ret = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float undef>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float><float -0.0, float poison>, <2 x float> %t1, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } @@ -139,11 +139,11 @@ define float @fsub_x_0(float %x) #0 { ret float %r } -define <2 x float> @fsub_x_0_vec_undef(<2 x float> %x) #0 { -; CHECK-LABEL: @fsub_x_0_vec_undef( +define <2 x float> @fsub_x_0_vec_poison(<2 x float> %x) #0 { +; CHECK-LABEL: @fsub_x_0_vec_poison( ; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; - %r = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float><float undef, float 0.0>, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %r = call <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float> %x, <2 x float><float poison, float 0.0>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %r } @@ -156,11 +156,11 @@ define float @fadd_x_n0(float %a) #0 { ret float %ret } -define <2 x float> @fadd_x_n0_vec_undef_elt(<2 x float> %a) #0 { -; CHECK-LABEL: @fadd_x_n0_vec_undef_elt( +define <2 x float> @fadd_x_n0_vec_poison_elt(<2 x float> %a) #0 { +; CHECK-LABEL: @fadd_x_n0_vec_poison_elt( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; - %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> <float -0.0, float undef>, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> <float -0.0, float poison>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } @@ -174,12 +174,12 @@ define float @fadd_x_p0(float %a) #0 { ret float %ret } -define <2 x float> @fadd_x_p0_vec_undef_elt(<2 x float> %a) #0 { -; CHECK-LABEL: @fadd_x_p0_vec_undef_elt( -; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float 0.000000e+00, float undef>, metadata !"round.tonearest", metadata !"fpexcept.ignore") +define <2 x float> @fadd_x_p0_vec_poison_elt(<2 x float> %a) #0 { +; CHECK-LABEL: @fadd_x_p0_vec_poison_elt( +; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> [[A:%.*]], <2 x float> <float 0.000000e+00, float poison>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ; CHECK-NEXT: ret <2 x float> [[RET]] ; - %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> <float 0.0, float undef>, metadata !"round.tonearest", metadata !"fpexcept.ignore") + %ret = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> <float 0.0, float poison>, metadata !"round.tonearest", metadata !"fpexcept.ignore") ret <2 x float> %ret } diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll index 5d17504..7a35f09 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll @@ -30,20 +30,20 @@ define <2 x float> @fsub_-0_x_vec(<2 x float> %a) { ret <2 x float> %ret } -define <2 x float> @fsub_-0_x_vec_undef_elts(<2 x float> %a) { -; CHECK-LABEL: @fsub_-0_x_vec_undef_elts( +define <2 x float> @fsub_-0_x_vec_poison_elts(<2 x float> %a) { +; CHECK-LABEL: @fsub_-0_x_vec_poison_elts( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; - %t1 = fsub <2 x float> <float -0.0, float undef>, %a + %t1 = fsub <2 x float> <float -0.0, float poison>, %a %ret = fneg <2 x float> %t1 ret <2 x float> %ret } -define <2 x float> @fsub_negzero_vec_undef_elts(<2 x float> %x) { -; CHECK-LABEL: @fsub_negzero_vec_undef_elts( +define <2 x float> @fsub_negzero_vec_poison_elts(<2 x float> %x) { +; CHECK-LABEL: @fsub_negzero_vec_poison_elts( ; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; - %r = fsub nsz <2 x float> %x, <float undef, float -0.0> + %r = fsub nsz <2 x float> %x, <float poison, float -0.0> ret <2 x float> %r } @@ -85,21 +85,21 @@ define <2 x float> @fneg_x_vec(<2 x float> %a) { ret <2 x float> %ret } -define <2 x float> @fsub_-0_-0_x_vec_undef_elts(<2 x float> %a) { -; CHECK-LABEL: @fsub_-0_-0_x_vec_undef_elts( +define <2 x float> @fsub_-0_-0_x_vec_poison_elts(<2 x float> %a) { +; CHECK-LABEL: @fsub_-0_-0_x_vec_poison_elts( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; - %t1 = fsub <2 x float> <float undef, float -0.0>, %a - %ret = fsub <2 x float> <float -0.0, float undef>, %t1 + %t1 = fsub <2 x float> <float poison, float -0.0>, %a + %ret = fsub <2 x float> <float -0.0, float poison>, %t1 ret <2 x float> %ret } -define <2 x float> @fneg_x_vec_undef_elts(<2 x float> %a) { -; CHECK-LABEL: @fneg_x_vec_undef_elts( +define <2 x float> @fneg_x_vec_poison_elts(<2 x float> %a) { +; CHECK-LABEL: @fneg_x_vec_poison_elts( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; %t1 = fneg <2 x float> %a - %ret = fsub <2 x float> <float -0.0, float undef>, %t1 + %ret = fsub <2 x float> <float -0.0, float poison>, %t1 ret <2 x float> %ret } @@ -136,11 +136,11 @@ define float @fsub_x_0(float %x) { ret float %r } -define <2 x float> @fsub_x_0_vec_undef(<2 x float> %x) { -; CHECK-LABEL: @fsub_x_0_vec_undef( +define <2 x float> @fsub_x_0_vec_poison(<2 x float> %x) { +; CHECK-LABEL: @fsub_x_0_vec_poison( ; CHECK-NEXT: ret <2 x float> [[X:%.*]] ; - %r = fsub <2 x float> %x, <float undef, float 0.0> + %r = fsub <2 x float> %x, <float poison, float 0.0> ret <2 x float> %r } @@ -153,11 +153,11 @@ define float @fadd_x_n0(float %a) { ret float %ret } -define <2 x float> @fadd_x_n0_vec_undef_elt(<2 x float> %a) { -; CHECK-LABEL: @fadd_x_n0_vec_undef_elt( +define <2 x float> @fadd_x_n0_vec_poison_elt(<2 x float> %a) { +; CHECK-LABEL: @fadd_x_n0_vec_poison_elt( ; CHECK-NEXT: ret <2 x float> [[A:%.*]] ; - %ret = fadd <2 x float> %a, <float -0.0, float undef> + %ret = fadd <2 x float> %a, <float -0.0, float poison> ret <2 x float> %ret } diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll index 3c1794c..70f0321 100644 --- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll +++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll @@ -547,30 +547,30 @@ define <2 x i1> @fabs_is_not_negative_anyzero(<2 x float> %V) { ret <2 x i1> %cmp } -define <3 x i1> @fabs_is_not_negative_negzero_undef(<3 x float> %V) { -; CHECK-LABEL: @fabs_is_not_negative_negzero_undef( +define <3 x i1> @fabs_is_not_negative_negzero_poison(<3 x float> %V) { +; CHECK-LABEL: @fabs_is_not_negative_negzero_poison( ; CHECK-NEXT: ret <3 x i1> zeroinitializer ; %abs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %V) - %cmp = fcmp olt <3 x float> %abs, <float -0.0, float -0.0, float undef> + %cmp = fcmp olt <3 x float> %abs, <float -0.0, float -0.0, float poison> ret <3 x i1> %cmp } -define <3 x i1> @fabs_is_not_negative_poszero_undef(<3 x float> %V) { -; CHECK-LABEL: @fabs_is_not_negative_poszero_undef( +define <3 x i1> @fabs_is_not_negative_poszero_poison(<3 x float> %V) { +; CHECK-LABEL: @fabs_is_not_negative_poszero_poison( ; CHECK-NEXT: ret <3 x i1> zeroinitializer ; %abs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %V) - %cmp = fcmp olt <3 x float> %abs, <float 0.0, float 0.0, float undef> + %cmp = fcmp olt <3 x float> %abs, <float 0.0, float 0.0, float poison> ret <3 x i1> %cmp } -define <3 x i1> @fabs_is_not_negative_anyzero_undef(<3 x float> %V) { -; CHECK-LABEL: @fabs_is_not_negative_anyzero_undef( +define <3 x i1> @fabs_is_not_negative_anyzero_poison(<3 x float> %V) { +; CHECK-LABEL: @fabs_is_not_negative_anyzero_poison( ; CHECK-NEXT: ret <3 x i1> zeroinitializer ; %abs = call <3 x float> @llvm.fabs.v3f32(<3 x float> %V) - %cmp = fcmp olt <3 x float> %abs, <float 0.0, float -0.0, float undef> + %cmp = fcmp olt <3 x float> %abs, <float 0.0, float -0.0, float poison> ret <3 x i1> %cmp } @@ -1335,19 +1335,19 @@ define <2 x i1> @orderedCompareWithNaNVector(<2 x double> %A) { ret <2 x i1> %cmp } -define <2 x i1> @orderedCompareWithNaNVector_undef_elt(<2 x double> %A) { -; CHECK-LABEL: @orderedCompareWithNaNVector_undef_elt( +define <2 x i1> @orderedCompareWithNaNVector_poison_elt(<2 x double> %A) { +; CHECK-LABEL: @orderedCompareWithNaNVector_poison_elt( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; - %cmp = fcmp olt <2 x double> %A, <double 0xFFFFFFFFFFFFFFFF, double undef> + %cmp = fcmp olt <2 x double> %A, <double 0xFFFFFFFFFFFFFFFF, double poison> ret <2 x i1> %cmp } -define <2 x i1> @unorderedCompareWithNaNVector_undef_elt(<2 x double> %A) { -; CHECK-LABEL: @unorderedCompareWithNaNVector_undef_elt( +define <2 x i1> @unorderedCompareWithNaNVector_poison_elt(<2 x double> %A) { +; CHECK-LABEL: @unorderedCompareWithNaNVector_poison_elt( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %cmp = fcmp ult <2 x double> %A, <double undef, double 0xFFFFFFFFFFFFFFFF> + %cmp = fcmp ult <2 x double> %A, <double poison, double 0xFFFFFFFFFFFFFFFF> ret <2 x i1> %cmp } diff --git a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll index a8a9e96..668a93d 100644 --- a/llvm/test/Transforms/InstSimplify/fminmax-folds.ll +++ b/llvm/test/Transforms/InstSimplify/fminmax-folds.ll @@ -493,7 +493,7 @@ define <2 x double> @maxnum_nan_op0_vec(<2 x double> %x) { ; CHECK-LABEL: @maxnum_nan_op0_vec( ; CHECK-NEXT: ret <2 x double> [[X:%.*]] ; - %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> <double 0x7ff8000000000000, double undef>, <2 x double> %x) + %r = call <2 x double> @llvm.maxnum.v2f64(<2 x double> <double 0x7ff8000000000000, double poison>, <2 x double> %x) ret <2 x double> %r } @@ -509,7 +509,7 @@ define <2 x double> @minnum_nan_op0_vec(<2 x double> %x) { ; CHECK-LABEL: @minnum_nan_op0_vec( ; CHECK-NEXT: ret <2 x double> [[X:%.*]] ; - %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double undef, double 0x7ff8000dead00000>, <2 x double> %x) + %r = call <2 x double> @llvm.minnum.v2f64(<2 x double> <double poison, double 0x7ff8000dead00000>, <2 x double> %x) ret <2 x double> %r } @@ -873,19 +873,19 @@ define double @minimum_nan_op1(double %x) { ret double %r } -define <2 x double> @maximum_nan_op0_vec_partial_undef(<2 x double> %x) { -; CHECK-LABEL: @maximum_nan_op0_vec_partial_undef( -; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000> +define <2 x double> @maximum_nan_op0_vec_partial_poison(<2 x double> %x) { +; CHECK-LABEL: @maximum_nan_op0_vec_partial_poison( +; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double poison> ; - %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> <double 0x7ff8000000000000, double undef>, <2 x double> %x) + %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> <double 0x7ff8000000000000, double poison>, <2 x double> %x) ret <2 x double> %r } -define <2 x double> @maximum_nan_op1_vec_partial_undef(<2 x double> %x) { -; CHECK-LABEL: @maximum_nan_op1_vec_partial_undef( -; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000000000000> +define <2 x double> @maximum_nan_op1_vec_partial_poison(<2 x double> %x) { +; CHECK-LABEL: @maximum_nan_op1_vec_partial_poison( +; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double poison> ; - %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 0x7ff8000000000000, double undef>) + %r = call <2 x double> @llvm.maximum.v2f64(<2 x double> %x, <2 x double> <double 0x7ff8000000000000, double poison>) ret <2 x double> %r } @@ -897,19 +897,19 @@ define <2 x double> @maximum_nan_op1_vec(<2 x double> %x) { ret <2 x double> %r } -define <2 x double> @minimum_nan_op0_vec_partial_undef(<2 x double> %x) { -; CHECK-LABEL: @minimum_nan_op0_vec_partial_undef( -; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000DEAD00000> +define <2 x double> @minimum_nan_op0_vec_partial_poison(<2 x double> %x) { +; CHECK-LABEL: @minimum_nan_op0_vec_partial_poison( +; CHECK-NEXT: ret <2 x double> <double poison, double 0x7FF8000DEAD00000> ; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> <double undef, double 0x7ff8000dead00000>, <2 x double> %x) + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> <double poison, double 0x7ff8000dead00000>, <2 x double> %x) ret <2 x double> %r } -define <2 x double> @minimum_nan_op1_vec_partial_undef(<2 x double> %x) { -; CHECK-LABEL: @minimum_nan_op1_vec_partial_undef( -; CHECK-NEXT: ret <2 x double> <double 0x7FF8000000000000, double 0x7FF8000DEAD00000> +define <2 x double> @minimum_nan_op1_vec_partial_poison(<2 x double> %x) { +; CHECK-LABEL: @minimum_nan_op1_vec_partial_poison( +; CHECK-NEXT: ret <2 x double> <double poison, double 0x7FF8000DEAD00000> ; - %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> <double undef, double 0x7ff8000dead00000>) + %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> <double poison, double 0x7ff8000dead00000>) ret <2 x double> %r } diff --git a/llvm/test/Transforms/InstSimplify/fp-nan.ll b/llvm/test/Transforms/InstSimplify/fp-nan.ll index cb0bed3..bb55750 100644 --- a/llvm/test/Transforms/InstSimplify/fp-nan.ll +++ b/llvm/test/Transforms/InstSimplify/fp-nan.ll @@ -163,13 +163,13 @@ define <2 x double> @fsub_nan_poison_op1(<2 x double> %x) { ret <2 x double> %r } -; Vector with undef element +; Vector with poison element -define <2 x double> @frem_nan_undef_op0(<2 x double> %x) { -; CHECK-LABEL: @frem_nan_undef_op0( -; CHECK-NEXT: ret <2 x double> <double 0xFFFF00000000DEAD, double 0x7FF8000000000000> +define <2 x double> @frem_nan_poison_op0(<2 x double> %x) { +; CHECK-LABEL: @frem_nan_poison_op0( +; CHECK-NEXT: ret <2 x double> <double 0xFFFF00000000DEAD, double poison> ; - %r = frem <2 x double> <double 0xFFFF00000000DEAD, double undef>, %x + %r = frem <2 x double> <double 0xFFFF00000000DEAD, double poison>, %x ret <2 x double> %r } @@ -177,7 +177,8 @@ define <2 x double> @frem_nan_undef_op0(<2 x double> %x) { define <3 x double> @fadd_nan_poison_undef_op1(<3 x double> %x) { ; CHECK-LABEL: @fadd_nan_poison_undef_op1( -; CHECK-NEXT: ret <3 x double> <double 0xFFFF00000000DEAD, double poison, double 0x7FF8000000000000> +; CHECK-NEXT: [[R:%.*]] = fadd <3 x double> [[X:%.*]], <double 0xFFFF00000000DEAD, double poison, double undef> +; CHECK-NEXT: ret <3 x double> [[R]] ; %r = fadd <3 x double> %x, <double 0xFFFF00000000DEAD, double poison, double undef> ret <3 x double> %r diff --git a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll index 6205225..a501f99 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-bool-constant.ll @@ -12,11 +12,11 @@ define <2 x i1> @eq_t(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @eq_t_undef_elt(<2 x i1> %a) { -; CHECK-LABEL: @eq_t_undef_elt( +define <2 x i1> @eq_t_poison_elt(<2 x i1> %a) { +; CHECK-LABEL: @eq_t_poison_elt( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %r = icmp eq <2 x i1> %a, <i1 undef, i1 true> + %r = icmp eq <2 x i1> %a, <i1 poison, i1 true> ret <2 x i1> %r } @@ -54,11 +54,11 @@ define <2 x i1> @ugt_t(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @ugt_t_undef_elt(<2 x i1> %a) { -; CHECK-LABEL: @ugt_t_undef_elt( +define <2 x i1> @ugt_t_poison_elt(<2 x i1> %a) { +; CHECK-LABEL: @ugt_t_poison_elt( ; CHECK-NEXT: ret <2 x i1> zeroinitializer ; - %r = icmp ugt <2 x i1> %a, <i1 true, i1 undef> + %r = icmp ugt <2 x i1> %a, <i1 true, i1 poison> ret <2 x i1> %r } @@ -161,11 +161,11 @@ define <2 x i1> @sge_t(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @sge_t_undef_elt(<2 x i1> %a) { -; CHECK-LABEL: @sge_t_undef_elt( +define <2 x i1> @sge_t_poison_elt(<2 x i1> %a) { +; CHECK-LABEL: @sge_t_poison_elt( ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; - %r = icmp sge <2 x i1> %a, <i1 true, i1 undef> + %r = icmp sge <2 x i1> %a, <i1 true, i1 poison> ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll index f4a0b6d..045d773 100644 --- a/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll +++ b/llvm/test/Transforms/InstSimplify/icmp-not-bool-constant.ll @@ -33,11 +33,11 @@ define <2 x i1> @eq_f_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @eq_f_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @eq_f_not_undef( +define <2 x i1> @eq_f_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @eq_f_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp eq <2 x i1> %not, <i1 false, i1 false> ret <2 x i1> %r } @@ -60,11 +60,11 @@ define <2 x i1> @ne_t_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @ne_t_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @ne_t_not_undef( +define <2 x i1> @ne_t_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @ne_t_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp ne <2 x i1> %not, <i1 true, i1 true> ret <2 x i1> %r } @@ -116,11 +116,11 @@ define <2 x i1> @ult_t_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @ult_t_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @ult_t_not_undef( +define <2 x i1> @ult_t_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @ult_t_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp ult <2 x i1> %not, <i1 true, i1 true> ret <2 x i1> %r } @@ -152,11 +152,11 @@ define <2 x i1> @sgt_t_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @sgt_t_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @sgt_t_not_undef( +define <2 x i1> @sgt_t_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @sgt_t_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp sgt <2 x i1> %not, <i1 true, i1 true> ret <2 x i1> %r } @@ -235,11 +235,11 @@ define <2 x i1> @ule_f_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @ule_f_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @ule_f_not_undef( +define <2 x i1> @ule_f_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @ule_f_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp ule <2 x i1> %not, <i1 false, i1 false> ret <2 x i1> %r } @@ -271,11 +271,11 @@ define <2 x i1> @sge_f_not_swap(<2 x i1> %a) { ret <2 x i1> %r } -define <2 x i1> @sge_f_not_undef(<2 x i1> %a) { -; CHECK-LABEL: @sge_f_not_undef( +define <2 x i1> @sge_f_not_poison(<2 x i1> %a) { +; CHECK-LABEL: @sge_f_not_poison( ; CHECK-NEXT: ret <2 x i1> [[A:%.*]] ; - %not = xor <2 x i1> %a, <i1 undef, i1 true> + %not = xor <2 x i1> %a, <i1 poison, i1 true> %r = icmp sge <2 x i1> %not, <i1 false, i1 false> ret <2 x i1> %r } diff --git a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll index 74039d3..4d662c0 100644 --- a/llvm/test/Transforms/InstSimplify/known-never-infinity.ll +++ b/llvm/test/Transforms/InstSimplify/known-never-infinity.ll @@ -1109,6 +1109,106 @@ define float @fcmp_ult_neginf_implies_class_assert(float %arg) { ret float %mul_by_zero } +define i1 @isKnownNeverInfinity_vector_reduce_maximum(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_maximum_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmaximum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_minimum(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_minimum_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fminimum.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmax(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmax_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmin(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: ret i1 true +; + %ninf.x = fadd ninf <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + +define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail(<4 x double> %x) { +; CHECK-LABEL: define i1 @isKnownNeverInfinity_vector_reduce_fmin_fail +; CHECK-SAME: (<4 x double> [[X:%.*]]) { +; CHECK-NEXT: [[NINF_X:%.*]] = fadd <4 x double> [[X]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> +; CHECK-NEXT: [[OP:%.*]] = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> [[NINF_X]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp une double [[OP]], 0x7FF0000000000000 +; CHECK-NEXT: ret i1 [[CMP]] +; + %ninf.x = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0> + %op = call double @llvm.vector.reduce.fmin.v4f64(<4 x double> %ninf.x) + %cmp = fcmp une double %op, 0x7ff0000000000000 + ret i1 %cmp +} + declare double @llvm.arithmetic.fence.f64(double) declare double @llvm.canonicalize.f64(double) declare double @llvm.ceil.f64(double) diff --git a/llvm/test/Transforms/InstSimplify/ldexp.ll b/llvm/test/Transforms/InstSimplify/ldexp.ll index c6bb014..d39f6a1 100644 --- a/llvm/test/Transforms/InstSimplify/ldexp.ll +++ b/llvm/test/Transforms/InstSimplify/ldexp.ll @@ -57,11 +57,12 @@ define void @ldexp_f32_exp0(float %x) { define void @ldexp_v2f32_exp0(<2 x float> %x) { ; CHECK-LABEL: @ldexp_v2f32_exp0( ; CHECK-NEXT: store volatile <2 x float> [[X:%.*]], ptr addrspace(1) undef, align 8 -; CHECK-NEXT: store volatile <2 x float> [[X]], ptr addrspace(1) undef, align 8 +; CHECK-NEXT: [[PART_UNDEF1:%.*]] = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> [[X]], <2 x i32> <i32 undef, i32 0>) +; CHECK-NEXT: store volatile <2 x float> [[PART_UNDEF1]], ptr addrspace(1) undef, align 8 ; CHECK-NEXT: store volatile <2 x float> [[X]], ptr addrspace(1) undef, align 8 ; CHECK-NEXT: ret void ; - %part.undef0 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> <i32 0, i32 undef>) + %part.undef0 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> <i32 0, i32 poison>) store volatile <2 x float> %part.undef0, ptr addrspace(1) undef %part.undef1 = call <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float> %x, <2 x i32> <i32 undef, i32 0>) diff --git a/llvm/test/Transforms/InstSimplify/mul.ll b/llvm/test/Transforms/InstSimplify/mul.ll index 8ae7f1ea..a1b03a3 100644 --- a/llvm/test/Transforms/InstSimplify/mul.ll +++ b/llvm/test/Transforms/InstSimplify/mul.ll @@ -34,11 +34,11 @@ define <16 x i8> @mul_by_0_vec(<16 x i8> %a) { ret <16 x i8> %b } -define <2 x i8> @mul_by_0_vec_undef_elt(<2 x i8> %a) { -; CHECK-LABEL: @mul_by_0_vec_undef_elt( +define <2 x i8> @mul_by_0_vec_poison_elt(<2 x i8> %a) { +; CHECK-LABEL: @mul_by_0_vec_poison_elt( ; CHECK-NEXT: ret <2 x i8> zeroinitializer ; - %b = mul <2 x i8> %a, <i8 undef, i8 0> + %b = mul <2 x i8> %a, <i8 poison, i8 0> ret <2 x i8> %b } diff --git a/llvm/test/Transforms/InstSimplify/negate.ll b/llvm/test/Transforms/InstSimplify/negate.ll index d72a0db..d07029b 100644 --- a/llvm/test/Transforms/InstSimplify/negate.ll +++ b/llvm/test/Transforms/InstSimplify/negate.ll @@ -17,11 +17,11 @@ define <2 x i32> @negate_nuw_vec(<2 x i32> %x) { ret <2 x i32> %neg } -define <2 x i32> @negate_nuw_vec_undef_elt(<2 x i32> %x) { -; CHECK-LABEL: @negate_nuw_vec_undef_elt( +define <2 x i32> @negate_nuw_vec_poison_elt(<2 x i32> %x) { +; CHECK-LABEL: @negate_nuw_vec_poison_elt( ; CHECK-NEXT: ret <2 x i32> zeroinitializer ; - %neg = sub nuw <2 x i32> <i32 0, i32 undef>, %x + %neg = sub nuw <2 x i32> <i32 0, i32 poison>, %x ret <2 x i32> %neg } @@ -43,12 +43,12 @@ define <2 x i8> @negate_zero_or_minsigned_nsw_vec(<2 x i8> %x) { ret <2 x i8> %neg } -define <2 x i8> @negate_zero_or_minsigned_nsw_vec_undef_elt(<2 x i8> %x) { -; CHECK-LABEL: @negate_zero_or_minsigned_nsw_vec_undef_elt( +define <2 x i8> @negate_zero_or_minsigned_nsw_vec_poison_elt(<2 x i8> %x) { +; CHECK-LABEL: @negate_zero_or_minsigned_nsw_vec_poison_elt( ; CHECK-NEXT: ret <2 x i8> zeroinitializer ; %signbit = shl <2 x i8> %x, <i8 7, i8 7> - %neg = sub nsw <2 x i8> <i8 undef, i8 0>, %signbit + %neg = sub nsw <2 x i8> <i8 poison, i8 0>, %signbit ret <2 x i8> %neg } diff --git a/llvm/test/Transforms/InstSimplify/or.ll b/llvm/test/Transforms/InstSimplify/or.ll index 913b760..f241c69 100644 --- a/llvm/test/Transforms/InstSimplify/or.ll +++ b/llvm/test/Transforms/InstSimplify/or.ll @@ -17,11 +17,11 @@ define i32 @all_ones(i32 %A) { ret i32 %B } -define <3 x i8> @all_ones_vec_with_undef_elt(<3 x i8> %A) { -; CHECK-LABEL: @all_ones_vec_with_undef_elt( +define <3 x i8> @all_ones_vec_with_poison_elt(<3 x i8> %A) { +; CHECK-LABEL: @all_ones_vec_with_poison_elt( ; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 -1, i8 -1> ; - %B = or <3 x i8> %A, <i8 -1, i8 undef, i8 -1> + %B = or <3 x i8> %A, <i8 -1, i8 poison, i8 -1> ret <3 x i8> %B } @@ -68,11 +68,11 @@ define i32 @or_not(i32 %A) { ret i32 %B } -define <2 x i4> @or_not_commute_vec_undef(<2 x i4> %A) { -; CHECK-LABEL: @or_not_commute_vec_undef( +define <2 x i4> @or_not_commute_vec_poison(<2 x i4> %A) { +; CHECK-LABEL: @or_not_commute_vec_poison( ; CHECK-NEXT: ret <2 x i4> <i4 -1, i4 -1> ; - %NotA = xor <2 x i4> %A, <i4 -1, i4 undef> + %NotA = xor <2 x i4> %A, <i4 -1, i4 poison> %B = or <2 x i4> %NotA, %A ret <2 x i4> %B } @@ -335,7 +335,7 @@ define <2 x i1> @or_with_not_op_commute4(<2 x i1> %a, <2 x i1> %b) { ; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true> ; %ab = and <2 x i1> %b, %a - %not = xor <2 x i1> %ab, <i1 -1, i1 undef> + %not = xor <2 x i1> %ab, <i1 -1, i1 poison> %r = or <2 x i1> %not, %a ret <2 x i1> %r } @@ -515,6 +515,21 @@ define <2 x i4> @and_or_not_or_commute7_undef_elt(<2 x i4> %A, <2 x i4> %B) { ret <2 x i4> %r } +; doing the same with poison is safe. + +define <2 x i4> @and_or_not_or_commute7_poison_elt(<2 x i4> %A, <2 x i4> %B) { +; CHECK-LABEL: @and_or_not_or_commute7_poison_elt( +; CHECK-NEXT: [[NOTA:%.*]] = xor <2 x i4> [[A:%.*]], <i4 poison, i4 -1> +; CHECK-NEXT: ret <2 x i4> [[NOTA]] +; + %nota = xor <2 x i4> %A, <i4 poison, i4 -1> + %and = and <2 x i4> %B, %nota + %or = or <2 x i4> %B, %A + %notab = xor <2 x i4> %or, <i4 -1, i4 -1> + %r = or <2 x i4> %notab, %and + ret <2 x i4> %r +} + ; (A | B) | (A ^ B) --> A | B define i69 @or_or_xor(i69 %A, i69 %B) { @@ -769,6 +784,21 @@ define <2 x i4> @or_nxor_and_undef_elt(<2 x i4> %a, <2 x i4> %b) { ret <2 x i4> %r } +; Same with poison is safe. + +define <2 x i4> @or_nxor_and_poison_elt(<2 x i4> %a, <2 x i4> %b) { +; CHECK-LABEL: @or_nxor_and_poison_elt( +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i4> [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[XOR]], <i4 -1, i4 poison> +; CHECK-NEXT: ret <2 x i4> [[NOT]] +; + %and = and <2 x i4> %b, %a + %xor = xor <2 x i4> %a, %b + %not = xor <2 x i4> %xor, <i4 -1, i4 poison> + %r = or <2 x i4> %not, %and + ret <2 x i4> %r +} + ; ~(A ^ B) | (A | B) --> -1 define i4 @or_nxor_or_commute0(i4 %a, i4 %b) { @@ -849,15 +879,15 @@ define i4 @or_nxor_or_wrong_val2(i4 %a, i4 %b, i4 %c) { ret i4 %r } -; negative test - undef in 'not' is allowed +; negative test - poison in 'not' is allowed -define <2 x i4> @or_nxor_or_undef_elt(<2 x i4> %a, <2 x i4> %b) { -; CHECK-LABEL: @or_nxor_or_undef_elt( +define <2 x i4> @or_nxor_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { +; CHECK-LABEL: @or_nxor_or_poison_elt( ; CHECK-NEXT: ret <2 x i4> <i4 -1, i4 -1> ; %or = or <2 x i4> %b, %a %xor = xor <2 x i4> %a, %b - %not = xor <2 x i4> %xor, <i4 -1, i4 undef> + %not = xor <2 x i4> %xor, <i4 -1, i4 poison> %r = or <2 x i4> %or, %not ret <2 x i4> %r } @@ -966,12 +996,12 @@ define i32 @or_xor_not_op_or_commute7(i32 %a, i32 %b){ ret i32 %r } -define <2 x i4> @or_xor_not_op_or_undef_elt(<2 x i4> %a, <2 x i4> %b) { -; CHECK-LABEL: @or_xor_not_op_or_undef_elt( +define <2 x i4> @or_xor_not_op_or_poison_elt(<2 x i4> %a, <2 x i4> %b) { +; CHECK-LABEL: @or_xor_not_op_or_poison_elt( ; CHECK-NEXT: ret <2 x i4> <i4 -1, i4 -1> ; %xor = xor <2 x i4> %a, %b - %nota = xor <2 x i4> %a, <i4 -1, i4 undef> + %nota = xor <2 x i4> %a, <i4 -1, i4 poison> %or = or <2 x i4> %nota, %b %r = or <2 x i4> %xor, %or ret <2 x i4> %r @@ -1082,6 +1112,21 @@ define <2 x i4> @or_nand_xor_undef_elt(<2 x i4> %x, <2 x i4> %y) { ret <2 x i4> %or } +; Same with poison is safe. + +define <2 x i4> @or_nand_xor_poison_elt(<2 x i4> %x, <2 x i4> %y) { +; CHECK-LABEL: @or_nand_xor_poison_elt( +; CHECK-NEXT: [[AND:%.*]] = and <2 x i4> [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NAND:%.*]] = xor <2 x i4> [[AND]], <i4 poison, i4 -1> +; CHECK-NEXT: ret <2 x i4> [[NAND]] +; + %and = and <2 x i4> %y, %x + %xor = xor <2 x i4> %x, %y + %nand = xor <2 x i4> %and, <i4 poison, i4 -1> + %or = or <2 x i4> %xor, %nand + ret <2 x i4> %or +} + declare i32 @llvm.fshl.i32 (i32, i32, i32) declare i32 @llvm.fshr.i32 (i32, i32, i32) diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll index dd83abf..d2c4a5d 100644 --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -40,7 +40,8 @@ define <2 x ptr addrspace(1) > @ptrmask_simplify_poison_and_zero_i32_vec_fail(<2 define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec(<2 x ptr> %p) { ; CHECK-LABEL: define <2 x ptr> @ptrmask_simplify_undef_and_ones_vec ; CHECK-SAME: (<2 x ptr> [[P:%.*]]) { -; CHECK-NEXT: ret <2 x ptr> [[P]] +; CHECK-NEXT: [[R:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[P]], <2 x i64> <i64 undef, i64 -1>) +; CHECK-NEXT: ret <2 x ptr> [[R]] ; %r = call <2 x ptr> @llvm.ptrmask.v2p1.v2i64(<2 x ptr> %p, <2 x i64> <i64 undef, i64 -1>) ret <2 x ptr> %r diff --git a/llvm/test/Transforms/InstSimplify/rem.ll b/llvm/test/Transforms/InstSimplify/rem.ll index 5af3b5f..a46db03 100644 --- a/llvm/test/Transforms/InstSimplify/rem.ll +++ b/llvm/test/Transforms/InstSimplify/rem.ll @@ -17,11 +17,11 @@ define <2 x i32> @zero_dividend_vector(<2 x i32> %A) { ret <2 x i32> %B } -define <2 x i32> @zero_dividend_vector_undef_elt(<2 x i32> %A) { -; CHECK-LABEL: @zero_dividend_vector_undef_elt( +define <2 x i32> @zero_dividend_vector_poison_elt(<2 x i32> %A) { +; CHECK-LABEL: @zero_dividend_vector_poison_elt( ; CHECK-NEXT: ret <2 x i32> zeroinitializer ; - %B = urem <2 x i32> <i32 undef, i32 0>, %A + %B = urem <2 x i32> <i32 poison, i32 0>, %A ret <2 x i32> %B } diff --git a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll index 6fb1261..40b22c6 100644 --- a/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll +++ b/llvm/test/Transforms/InstSimplify/saturating-add-sub.ll @@ -44,7 +44,7 @@ define <2 x i8> @uadd_vector_0_commute(<2 x i8> %a) { ; CHECK-LABEL: @uadd_vector_0_commute( ; CHECK-NEXT: ret <2 x i8> [[A:%.*]] ; - %x2v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 0, i8 undef>, <2 x i8> %a) + %x2v = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> <i8 0, i8 poison>, <2 x i8> %a) ret <2 x i8> %x2v } @@ -156,7 +156,7 @@ define <2 x i8> @sadd_vector_0(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_0( ; CHECK-NEXT: ret <2 x i8> [[A:%.*]] ; - %y1v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 undef, i8 0>) + %y1v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> %a, <2 x i8> <i8 poison, i8 0>) ret <2 x i8> %y1v } @@ -205,10 +205,10 @@ define i8 @sadd_scalar_maxval_commute(i8 %a) { define <2 x i8> @sadd_vector_maxval_commute(<2 x i8> %a) { ; CHECK-LABEL: @sadd_vector_maxval_commute( -; CHECK-NEXT: [[Y4V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 undef, i8 127>, <2 x i8> [[A:%.*]]) +; CHECK-NEXT: [[Y4V:%.*]] = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 poison, i8 127>, <2 x i8> [[A:%.*]]) ; CHECK-NEXT: ret <2 x i8> [[Y4V]] ; - %y4v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 undef, i8 127>, <2 x i8> %a) + %y4v = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> <i8 poison, i8 127>, <2 x i8> %a) ret <2 x i8> %y4v } diff --git a/llvm/test/Transforms/InstSimplify/sdiv.ll b/llvm/test/Transforms/InstSimplify/sdiv.ll index 2514d90..9909280 100644 --- a/llvm/test/Transforms/InstSimplify/sdiv.ll +++ b/llvm/test/Transforms/InstSimplify/sdiv.ll @@ -158,11 +158,11 @@ define <2 x i32> @knownnegation_commute_vec_bad3(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %div } -define <3 x i32> @negated_operand_vec_undef(<3 x i32> %x) { -; CHECK-LABEL: @negated_operand_vec_undef( +define <3 x i32> @negated_operand_vec_poison(<3 x i32> %x) { +; CHECK-LABEL: @negated_operand_vec_poison( ; CHECK-NEXT: ret <3 x i32> <i32 -1, i32 -1, i32 -1> ; - %negx = sub nsw <3 x i32> <i32 0, i32 undef, i32 0>, %x + %negx = sub nsw <3 x i32> <i32 0, i32 poison, i32 0>, %x %div = sdiv <3 x i32> %negx, %x ret <3 x i32> %div } diff --git a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll index 2a4ce85..fcf8c31 100644 --- a/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll +++ b/llvm/test/Transforms/InstSimplify/select-inseltpoison.ll @@ -17,11 +17,11 @@ define <2 x i1> @bool_true_or_false_vec(<2 x i1> %cond) { ret <2 x i1> %s } -define <2 x i1> @bool_true_or_false_vec_undef(<2 x i1> %cond) { -; CHECK-LABEL: @bool_true_or_false_vec_undef( +define <2 x i1> @bool_true_or_false_vec_poison(<2 x i1> %cond) { +; CHECK-LABEL: @bool_true_or_false_vec_poison( ; CHECK-NEXT: ret <2 x i1> [[COND:%.*]] ; - %s = select <2 x i1> %cond, <2 x i1> <i1 undef, i1 true>, <2 x i1> <i1 false, i1 undef> + %s = select <2 x i1> %cond, <2 x i1> <i1 poison, i1 true>, <2 x i1> <i1 false, i1 poison> ret <2 x i1> %s } @@ -57,27 +57,27 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { ret <2 x i32> %V } -define <2 x i32> @equal_arms_vec_undef(<2 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_undef( +define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_poison( ; CHECK-NEXT: ret <2 x i32> <i32 42, i32 42> ; - %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 undef>, <2 x i32> <i32 undef, i32 42> + %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 poison>, <2 x i32> <i32 poison, i32 42> ret <2 x i32> %V } -define <3 x float> @equal_arms_vec_less_undef(<3 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_less_undef( +define <3 x float> @equal_arms_vec_less_poison(<3 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_less_poison( ; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float 4.200000e+01, float 4.300000e+01> ; - %V = select <3 x i1> %cond, <3 x float> <float 42.0, float undef, float 43.0>, <3 x float> <float 42.0, float 42.0, float 43.0> + %V = select <3 x i1> %cond, <3 x float> <float 42.0, float poison, float 43.0>, <3 x float> <float 42.0, float 42.0, float 43.0> ret <3 x float> %V } -define <3 x float> @equal_arms_vec_more_undef(<3 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_more_undef( -; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float undef, float 4.300000e+01> +define <3 x float> @equal_arms_vec_more_poison(<3 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_more_poison( +; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float poison, float 4.300000e+01> ; - %V = select <3 x i1> %cond, <3 x float> <float 42.0, float undef, float undef>, <3 x float> <float undef, float undef, float 43.0> + %V = select <3 x i1> %cond, <3 x float> <float 42.0, float poison, float poison>, <3 x float> <float poison, float poison, float 43.0> ret <3 x float> %V } @@ -105,19 +105,19 @@ define <2 x i8> @vsel_mixedvec() { ret <2 x i8> %s } -define <3 x i8> @vsel_undef_true_op(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @vsel_undef_true_op( +define <3 x i8> @vsel_poison_true_op(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @vsel_poison_true_op( ; CHECK-NEXT: ret <3 x i8> [[X:%.*]] ; - %s = select <3 x i1><i1 1, i1 undef, i1 1>, <3 x i8> %x, <3 x i8> %y + %s = select <3 x i1><i1 1, i1 poison, i1 1>, <3 x i8> %x, <3 x i8> %y ret <3 x i8> %s } -define <3 x i4> @vsel_undef_false_op(<3 x i4> %x, <3 x i4> %y) { -; CHECK-LABEL: @vsel_undef_false_op( +define <3 x i4> @vsel_poison_false_op(<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @vsel_poison_false_op( ; CHECK-NEXT: ret <3 x i4> [[Y:%.*]] ; - %s = select <3 x i1><i1 0, i1 undef, i1 undef>, <3 x i4> %x, <3 x i4> %y + %s = select <3 x i1><i1 0, i1 poison, i1 poison>, <3 x i4> %x, <3 x i4> %y ret <3 x i4> %s } diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll index fe93a0c..40c1460 100644 --- a/llvm/test/Transforms/InstSimplify/select.ll +++ b/llvm/test/Transforms/InstSimplify/select.ll @@ -25,11 +25,11 @@ define <2 x i1> @bool_true_or_false_vec(<2 x i1> %cond) { ret <2 x i1> %s } -define <2 x i1> @bool_true_or_false_vec_undef(<2 x i1> %cond) { -; CHECK-LABEL: @bool_true_or_false_vec_undef( +define <2 x i1> @bool_true_or_false_vec_poison(<2 x i1> %cond) { +; CHECK-LABEL: @bool_true_or_false_vec_poison( ; CHECK-NEXT: ret <2 x i1> [[COND:%.*]] ; - %s = select <2 x i1> %cond, <2 x i1> <i1 undef, i1 true>, <2 x i1> <i1 false, i1 undef> + %s = select <2 x i1> %cond, <2 x i1> <i1 poison, i1 true>, <2 x i1> <i1 false, i1 poison> ret <2 x i1> %s } @@ -65,27 +65,27 @@ define <2 x i32> @equal_arms_vec(<2 x i1> %cond, <2 x i32> %x) { ret <2 x i32> %V } -define <2 x i32> @equal_arms_vec_undef(<2 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_undef( +define <2 x i32> @equal_arms_vec_poison(<2 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_poison( ; CHECK-NEXT: ret <2 x i32> <i32 42, i32 42> ; - %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 undef>, <2 x i32> <i32 undef, i32 42> + %V = select <2 x i1> %cond, <2 x i32> <i32 42, i32 poison>, <2 x i32> <i32 poison, i32 42> ret <2 x i32> %V } -define <3 x float> @equal_arms_vec_less_undef(<3 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_less_undef( +define <3 x float> @equal_arms_vec_less_poison(<3 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_less_poison( ; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float 4.200000e+01, float 4.300000e+01> ; - %V = select <3 x i1> %cond, <3 x float> <float 42.0, float undef, float 43.0>, <3 x float> <float 42.0, float 42.0, float 43.0> + %V = select <3 x i1> %cond, <3 x float> <float 42.0, float poison, float 43.0>, <3 x float> <float 42.0, float 42.0, float 43.0> ret <3 x float> %V } -define <3 x float> @equal_arms_vec_more_undef(<3 x i1> %cond) { -; CHECK-LABEL: @equal_arms_vec_more_undef( -; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float undef, float 4.300000e+01> +define <3 x float> @equal_arms_vec_more_poison(<3 x i1> %cond) { +; CHECK-LABEL: @equal_arms_vec_more_poison( +; CHECK-NEXT: ret <3 x float> <float 4.200000e+01, float poison, float 4.300000e+01> ; - %V = select <3 x i1> %cond, <3 x float> <float 42.0, float undef, float undef>, <3 x float> <float undef, float undef, float 43.0> + %V = select <3 x i1> %cond, <3 x float> <float 42.0, float poison, float poison>, <3 x float> <float poison, float poison, float 43.0> ret <3 x float> %V } @@ -113,19 +113,19 @@ define <2 x i8> @vsel_mixedvec() { ret <2 x i8> %s } -define <3 x i8> @vsel_undef_true_op(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @vsel_undef_true_op( +define <3 x i8> @vsel_poison_true_op(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @vsel_poison_true_op( ; CHECK-NEXT: ret <3 x i8> [[X:%.*]] ; - %s = select <3 x i1><i1 1, i1 undef, i1 1>, <3 x i8> %x, <3 x i8> %y + %s = select <3 x i1><i1 1, i1 poison, i1 1>, <3 x i8> %x, <3 x i8> %y ret <3 x i8> %s } -define <3 x i4> @vsel_undef_false_op(<3 x i4> %x, <3 x i4> %y) { -; CHECK-LABEL: @vsel_undef_false_op( +define <3 x i4> @vsel_poison_false_op(<3 x i4> %x, <3 x i4> %y) { +; CHECK-LABEL: @vsel_poison_false_op( ; CHECK-NEXT: ret <3 x i4> [[Y:%.*]] ; - %s = select <3 x i1><i1 0, i1 undef, i1 undef>, <3 x i4> %x, <3 x i4> %y + %s = select <3 x i1><i1 0, i1 poison, i1 poison>, <3 x i4> %x, <3 x i4> %y ret <3 x i4> %s } diff --git a/llvm/test/Transforms/InstSimplify/shift.ll b/llvm/test/Transforms/InstSimplify/shift.ll index b562c3c..a816fcb 100644 --- a/llvm/test/Transforms/InstSimplify/shift.ll +++ b/llvm/test/Transforms/InstSimplify/shift.ll @@ -17,11 +17,11 @@ define i41 @shl_0(i41 %X) { ret i41 %B } -define <2 x i41> @shl_0_vec_undef_elt(<2 x i41> %X) { -; CHECK-LABEL: @shl_0_vec_undef_elt( +define <2 x i41> @shl_0_vec_poison_elt(<2 x i41> %X) { +; CHECK-LABEL: @shl_0_vec_poison_elt( ; CHECK-NEXT: ret <2 x i41> zeroinitializer ; - %B = shl <2 x i41> <i41 0, i41 undef>, %X + %B = shl <2 x i41> <i41 0, i41 poison>, %X ret <2 x i41> %B } @@ -41,11 +41,11 @@ define i39 @ashr_0(i39 %X) { ret i39 %B } -define <2 x i141> @ashr_0_vec_undef_elt(<2 x i141> %X) { -; CHECK-LABEL: @ashr_0_vec_undef_elt( +define <2 x i141> @ashr_0_vec_poison_elt(<2 x i141> %X) { +; CHECK-LABEL: @ashr_0_vec_poison_elt( ; CHECK-NEXT: ret <2 x i141> zeroinitializer ; - %B = shl <2 x i141> <i141 undef, i141 0>, %X + %B = shl <2 x i141> <i141 poison, i141 0>, %X ret <2 x i141> %B } @@ -113,11 +113,11 @@ define i32 @ashr_all_ones(i32 %A) { ret i32 %B } -define <3 x i8> @ashr_all_ones_vec_with_undef_elts(<3 x i8> %x, <3 x i8> %y) { -; CHECK-LABEL: @ashr_all_ones_vec_with_undef_elts( +define <3 x i8> @ashr_all_ones_vec_with_poison_elts(<3 x i8> %x, <3 x i8> %y) { +; CHECK-LABEL: @ashr_all_ones_vec_with_poison_elts( ; CHECK-NEXT: ret <3 x i8> <i8 -1, i8 -1, i8 -1> ; - %sh = ashr <3 x i8> <i8 undef, i8 -1, i8 undef>, %y + %sh = ashr <3 x i8> <i8 poison, i8 -1, i8 poison>, %y ret <3 x i8> %sh } @@ -306,11 +306,22 @@ define <2 x i7> @all_ones_left_right_splat(<2 x i7> %x) { ; Poison could propagate, but undef must not. -define <3 x i7> @all_ones_left_right_splat_poison_undef_elt(<3 x i7> %x) { -; CHECK-LABEL: @all_ones_left_right_splat_poison_undef_elt( +define <3 x i7> @all_ones_left_right_splat_undef_elt(<3 x i7> %x) { +; CHECK-LABEL: @all_ones_left_right_splat_undef_elt( +; CHECK-NEXT: [[LEFT:%.*]] = shl <3 x i7> <i7 undef, i7 -1, i7 undef>, [[X:%.*]] +; CHECK-NEXT: [[RIGHT:%.*]] = ashr <3 x i7> [[LEFT]], [[X]] +; CHECK-NEXT: ret <3 x i7> [[RIGHT]] +; + %left = shl <3 x i7> <i7 undef, i7 -1, i7 undef>, %x + %right = ashr <3 x i7> %left, %x + ret <3 x i7> %right +} + +define <3 x i7> @all_ones_left_right_splat_poison__elt(<3 x i7> %x) { +; CHECK-LABEL: @all_ones_left_right_splat_poison__elt( ; CHECK-NEXT: ret <3 x i7> <i7 -1, i7 -1, i7 -1> ; - %left = shl <3 x i7> <i7 poison, i7 -1, i7 undef>, %x + %left = shl <3 x i7> <i7 poison, i7 -1, i7 poison>, %x %right = ashr <3 x i7> %left, %x ret <3 x i7> %right } diff --git a/llvm/test/Transforms/InstSimplify/srem.ll b/llvm/test/Transforms/InstSimplify/srem.ll index b1cbdf3..ab72683 100644 --- a/llvm/test/Transforms/InstSimplify/srem.ll +++ b/llvm/test/Transforms/InstSimplify/srem.ll @@ -39,11 +39,11 @@ define <2 x i32> @knownnegation_commute_vec(<2 x i32> %x, <2 x i32> %y) { ret <2 x i32> %rem } -define <3 x i32> @negated_operand_vec_undef(<3 x i32> %x) { -; CHECK-LABEL: @negated_operand_vec_undef( +define <3 x i32> @negated_operand_vec_poison(<3 x i32> %x) { +; CHECK-LABEL: @negated_operand_vec_poison( ; CHECK-NEXT: ret <3 x i32> zeroinitializer ; - %negx = sub <3 x i32> <i32 0, i32 undef, i32 0>, %x + %negx = sub <3 x i32> <i32 0, i32 poison, i32 0>, %x %rem = srem <3 x i32> %negx, %x ret <3 x i32> %rem } diff --git a/llvm/test/Transforms/InstSimplify/sub.ll b/llvm/test/Transforms/InstSimplify/sub.ll index deb0ee3..fd88fc1 100644 --- a/llvm/test/Transforms/InstSimplify/sub.ll +++ b/llvm/test/Transforms/InstSimplify/sub.ll @@ -29,7 +29,7 @@ define <2 x i32> @sub_zero_vec(<2 x i32> %A) { ; CHECK-LABEL: @sub_zero_vec( ; CHECK-NEXT: ret <2 x i32> [[A:%.*]] ; - %B = sub <2 x i32> %A, <i32 0, i32 undef> + %B = sub <2 x i32> %A, <i32 0, i32 poison> ret <2 x i32> %B } @@ -46,8 +46,8 @@ define <2 x i32> @neg_neg_vec(<2 x i32> %A) { ; CHECK-LABEL: @neg_neg_vec( ; CHECK-NEXT: ret <2 x i32> [[A:%.*]] ; - %B = sub <2 x i32> <i32 0, i32 undef>, %A - %C = sub <2 x i32> <i32 0, i32 undef>, %B + %B = sub <2 x i32> <i32 0, i32 poison>, %A + %C = sub <2 x i32> <i32 0, i32 poison>, %B ret <2 x i32> %C } diff --git a/llvm/test/Transforms/InstSimplify/xor.ll b/llvm/test/Transforms/InstSimplify/xor.ll index 0e23cc6..229e943 100644 --- a/llvm/test/Transforms/InstSimplify/xor.ll +++ b/llvm/test/Transforms/InstSimplify/xor.ll @@ -156,6 +156,20 @@ define <2 x i4> @xor_and_or_not_undef_elt(<2 x i4> %a, <2 x i4> %b) { ret <2 x i4> %r } +; but correct to propagate poison element + +define <2 x i4> @xor_and_or_not_poison_elt(<2 x i4> %a, <2 x i4> %b) { +; CHECK-LABEL: @xor_and_or_not_poison_elt( +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i4> [[A:%.*]], <i4 -1, i4 poison> +; CHECK-NEXT: ret <2 x i4> [[NOT]] +; + %and = and <2 x i4> %b, %a + %not = xor <2 x i4> %a, <i4 -1, i4 poison> + %or = or <2 x i4> %not, %b + %r = xor <2 x i4> %or, %and + ret <2 x i4> %r +} + define i4 @xor_or_and_not_commute0(i4 %a, i4 %b) { ; CHECK-LABEL: @xor_or_and_not_commute0( ; CHECK-NEXT: ret i4 [[A:%.*]] @@ -277,11 +291,11 @@ define i4 @xor_or_and_not_wrong_val2(i4 %a, i4 %b, i4 %c) { ret i4 %r } -define <2 x i4> @xor_or_and_not_undef_elt(<2 x i4> %a, <2 x i4> %b) { -; CHECK-LABEL: @xor_or_and_not_undef_elt( +define <2 x i4> @xor_or_and_not_poison_elt(<2 x i4> %a, <2 x i4> %b) { +; CHECK-LABEL: @xor_or_and_not_poison_elt( ; CHECK-NEXT: ret <2 x i4> [[A:%.*]] ; - %not = xor <2 x i4> %a, <i4 -1, i4 undef> + %not = xor <2 x i4> %a, <i4 -1, i4 poison> %and = and <2 x i4> %b, %not %or = or <2 x i4> %a, %b %r = xor <2 x i4> %or, %and diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index feb22aa..45e2c36 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -491,6 +491,30 @@ define void @store_bfloat_factor2(ptr %ptr, <16 x bfloat> %v0, <16 x bfloat> %v1 ret void } +; Ensure vscale_range property does not affect scalable vector types. +define { <vscale x 4 x double>, <vscale x 4 x double> } @deinterleave_nxptr_factor2(ptr %ptr) #2 { +; CHECK-LABEL: define { <vscale x 4 x double>, <vscale x 4 x double> } @deinterleave_nxptr_factor2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 0 +; CHECK-NEXT: [[LDN1:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP2]], i64 0) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN1]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> poison, <vscale x 2 x double> [[TMP4]], i64 0) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr <vscale x 2 x double>, ptr [[PTR]], i64 2 +; CHECK-NEXT: [[LDN2:%.*]] = call { <vscale x 2 x double>, <vscale x 2 x double> } @llvm.aarch64.sve.ld2.sret.nxv2f64(<vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), ptr [[TMP6]]) +; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP3]], <vscale x 2 x double> [[TMP7]], i64 2) +; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <vscale x 2 x double>, <vscale x 2 x double> } [[LDN2]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x double> @llvm.vector.insert.nxv4f64.nxv2f64(<vscale x 4 x double> [[TMP5]], <vscale x 2 x double> [[TMP9]], i64 2) +; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } poison, <vscale x 4 x double> [[TMP8]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP11]], <vscale x 4 x double> [[TMP10]], 1 +; CHECK-NEXT: ret { <vscale x 4 x double>, <vscale x 4 x double> } [[TMP12]] +; + %wide.vec = load <vscale x 8 x double>, ptr %ptr, align 8 + %ldN = tail call { <vscale x 4 x double>, <vscale x 4 x double> } @llvm.experimental.vector.deinterleave2.nxv8f64(<vscale x 8 x double> %wide.vec) + ret { <vscale x 4 x double>, <vscale x 4 x double> } %ldN +} + attributes #0 = { vscale_range(2,2) "target-features"="+sve" } attributes #1 = { vscale_range(2,4) "target-features"="+sve" } attributes #2 = { vscale_range(4,4) "target-features"="+sve" } diff --git a/llvm/test/Transforms/LoopFlatten/widen-iv3.ll b/llvm/test/Transforms/LoopFlatten/widen-iv3.ll index 6e6c045..3ac5a69 100644 --- a/llvm/test/Transforms/LoopFlatten/widen-iv3.ll +++ b/llvm/test/Transforms/LoopFlatten/widen-iv3.ll @@ -35,7 +35,7 @@ define i16 @foo() { ; CHECK-NEXT: [[SUM_110:%.*]] = phi i16 [ [[SUM_012]], [[FOR_COND1_PREHEADER]] ], [ [[ADD5]], [[FOR_BODY4]] ] ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[INDVAR]], [[TMP0]] ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i16 [[J_011]], [[MUL]] -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = trunc nuw nsw i32 [[TMP2]] to i16 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i16], ptr @v, i16 0, i16 [[TMP3]] ; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[ADD5]] = add nsw i16 [[TMP4]], [[SUM_110]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll index 2470bca..1c26ee8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-select-cmp.ll @@ -8,41 +8,39 @@ target triple = "aarch64-linux-gnu" define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <vscale x 4 x i32> {{.*}}, shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP1]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP2]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP3]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <vscale x 4 x i1> [[VEC_ICMP4]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <vscale x 4 x i1> [[VEC_PHI1]], [[NOT1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <vscale x 4 x i1> [[VEC_PHI2]], [[NOT2]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <vscale x 4 x i1> [[VEC_PHI3]], [[NOT3]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <vscale x 4 x i1> [[VEC_PHI4]], [[NOT4]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL1]] = select <vscale x 4 x i1> [[VEC_ICMP1]], <vscale x 4 x i32> [[VEC_PHI1]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL2]] = select <vscale x 4 x i1> [[VEC_ICMP2]], <vscale x 4 x i32> [[VEC_PHI2]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL3]] = select <vscale x 4 x i1> [[VEC_ICMP3]], <vscale x 4 x i32> [[VEC_PHI3]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL4]] = select <vscale x 4 x i1> [[VEC_ICMP4]], <vscale x 4 x i32> [[VEC_PHI4]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[OR1:%.*]] = or <vscale x 4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF4IC4-NEXT: [[OR2:%.*]] = or <vscale x 4 x i1> [[VEC_SEL3]], [[OR1]] -; CHECK-VF4IC4-NEXT: [[OR3:%.*]] = or <vscale x 4 x i1> [[VEC_SEL4]], [[OR2]] -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[OR3]]) -; CHECK-VF4IC4-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP5]], <vscale x 4 x i32> [[VEC_SEL1]], <vscale x 4 x i32> [[VEC_SEL2]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL5]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP6]], <vscale x 4 x i32> [[VEC_SEL5]], <vscale x 4 x i32> [[VEC_SEL3]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL6]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP7]], <vscale x 4 x i32> [[VEC_SEL6]], <vscale x 4 x i32> [[VEC_SEL4]] +; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL7]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -64,18 +62,21 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32> -; CHECK-VF4IC1-NOT: shufflevector <vscale x 4 x i32> +; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <vscale x 4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[FIN_INS]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a ; CHECK-VF4IC4-LABEL: @select_i32_from_icmp ; CHECK-VF4IC4: vector.body: @@ -100,15 +101,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = fcmp fast ueq <vscale x 4 x float> [[VEC_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[NOT:%*]] = xor <vscale x 4 x i1> [[VEC_ICMP]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 ; CHECK-VF4IC4-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC4: vector.body: @@ -156,17 +156,17 @@ exit: ; preds = %for.body define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { ; CHECK-VF4IC1-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <vscale x 4 x i32> ; CHECK-VF4IC1: [[MASK:%.*]] = icmp sgt <vscale x 4 x i32> [[VEC_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; CHECK-VF4IC1: [[MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr {{%.*}}, i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> poison) ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <vscale x 4 x i32> [[MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[VEC_ICMP]] -; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i1> [[VEC_SEL_TMP]], <vscale x 4 x i1> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL_TMP:%.*]] = select <vscale x 4 x i1> [[VEC_ICMP]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]] +; CHECK-VF4IC1: [[VEC_SEL:%.*]] = select <vscale x 4 x i1> [[MASK]], <vscale x 4 x i32> [[VEC_SEL_TMP]], <vscale x 4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR]], i32 1, i32 0 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <vscale x 4 x i32> [[VEC_SEL]], zeroinitializer +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 0 ; CHECK-VF4IC4-LABEL: @pred_select_const_i32_from_icmp ; CHECK-VF4IC4: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll index d5ace65..c226135 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll @@ -46,8 +46,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -24 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i64 -56 ; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i1> [[TMP5]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP8]], i32 8, <4 x i1> [[REVERSE3]], <4 x double> poison) +; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: [[WIDE_MASKED_LOAD6:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP9]], i32 8, <4 x i1> [[REVERSE4]], <4 x double> poison) ; CHECK-NEXT: [[TMP10:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> ; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[WIDE_MASKED_LOAD6]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00> diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll index c49f7d4..eeef8f1 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll @@ -102,10 +102,10 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 64 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]] @@ -116,15 +116,12 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) { ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 0, i64 8, i64 16, i64 24, i64 32, i64 40, i64 48, i64 56, i64 64, i64 72, i64 80, i64 88, i64 96, i64 104, i64 112, i64 120, i64 128, i64 136, i64 144, i64 152, i64 160, i64 168, i64 176, i64 184, i64 192, i64 200, i64 208, i64 216, i64 224, i64 232, i64 240, i64 248> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> <i64 256, i64 264, i64 272, i64 280, i64 288, i64 296, i64 304, i64 312, i64 320, i64 328, i64 336, i64 344, i64 352, i64 360, i64 368, i64 376, i64 384, i64 392, i64 400, i64 408, i64 416, i64 424, i64 432, i64 440, i64 448, i64 456, i64 464, i64 472, i64 480, i64 488, i64 496, i64 504> -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP3]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x ptr> [[TMP4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <32 x ptr> [[TMP5]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i1> [[TMP7]], i32 31 ; CHECK-NEXT: store i1 [[TMP8]], ptr [[DST:%.*]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 512 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 256 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll index 2b58acb..8a2dc0a 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/select-cmp-reduction.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S \ ; RUN: < %s | FileCheck %s ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 \ @@ -6,59 +7,109 @@ target triple = "riscv64" define i32 @select_icmp(i32 %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_icmp +; CHECK-LABEL: define i32 @select_icmp( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP7]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; -; SCALABLE-LABEL: @select_icmp +; SCALABLE-LABEL: define i32 @select_icmp( +; SCALABLE-SAME: i32 [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp slt <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP10]], zeroinitializer +; SCALABLE-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP12]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; SCALABLE-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP13]], [[X]] +; SCALABLE-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; SCALABLE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; SCALABLE: for.end: +; SCALABLE-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -79,59 +130,109 @@ for.end: } define i32 @select_fcmp(float %x, i32 %y, ptr nocapture readonly %c, i64 %n) #0 { -; CHECK-LABEL: @select_fcmp +; CHECK-LABEL: define i32 @select_fcmp( +; CHECK-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X:%.*]], i64 0 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[Y]], i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP7]], [[X]] +; CHECK-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[COND_LCSSA]] ; -; SCALABLE-LABEL: @select_fcmp +; SCALABLE-LABEL: define i32 @select_fcmp( +; SCALABLE-SAME: float [[X:%.*]], i32 [[Y:%.*]], ptr nocapture readonly [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X:%.*]], i64 0 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[X]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x float> [[BROADCAST_SPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[C:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; SCALABLE-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT2]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %y, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP10]], zeroinitializer +; SCALABLE-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP12]], i32 [[Y]], i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[A:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[COND:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[C]], i64 [[INDVARS_IV]] +; SCALABLE-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP13]], [[X]] +; SCALABLE-NEXT: [[COND]] = select i1 [[CMP1]], i32 [[A]], i32 [[Y]] +; SCALABLE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; SCALABLE: for.end: +; SCALABLE-NEXT: [[COND_LCSSA:%.*]] = phi i32 [ [[COND]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[COND_LCSSA]] ; entry: br label %for.body @@ -152,55 +253,101 @@ for.end: } define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_icmp +; CHECK-LABEL: define i32 @select_const_i32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 7, i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 3, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 7 +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_i32_from_icmp +; SCALABLE-LABEL: define i32 @select_const_i32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 7, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 7, i32 3 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 7, i32 3 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 3, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 3 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 7 +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -221,55 +368,113 @@ exit: ; preds = %for.body } define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) #0 { -; CHECK-LABEL: @select_i32_from_icmp +; CHECK-LABEL: define i32 @select_i32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[B]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[B]], i32 [[A]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 [[B]] +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_i32_from_icmp +; SCALABLE-LABEL: define i32 @select_i32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 +; SCALABLE-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0 +; SCALABLE-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[B]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[BROADCAST_SPLAT]] ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 %b, i32 %a +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 [[B]], i32 [[A]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i32 [[TMP16]], 3 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 [[B]] +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -290,55 +495,101 @@ exit: ; preds = %for.body } define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_i32_from_fcmp +; CHECK-LABEL: define i32 @select_const_i32_from_fcmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ueq <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> -; CHECK-NEXT: [[NOT:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP5]], <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 2 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP15:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fcmp fast ueq float [[TMP10]], 3.000000e+00 +; CHECK-NEXT: [[TMP12]] = select i1 [[TMP11]], i32 [[TMP8]], i32 1 +; CHECK-NEXT: [[TMP13]] = add nuw nsw i64 [[TMP15]], 1 +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], [[N]] +; CHECK-NEXT: br i1 [[TMP14]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP12]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_i32_from_fcmp +; SCALABLE-LABEL: define i32 @select_const_i32_from_fcmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP4]] ; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[TMP5]], i32 0 ; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP6]], align 4 ; SCALABLE-NEXT: [[TMP8:%.*]] = fcmp fast ueq <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[NOT:%.*]] = xor <vscale x 4 x i1> [[TMP8]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9]] = or <vscale x 4 x i1> [[VEC_PHI]], [[NOT]] +; SCALABLE-NEXT: [[TMP9]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) ; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP13]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 2 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[TMP9]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP13]], i32 1, i32 2 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP21:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP14:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[V]], i64 [[TMP21]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load float, ptr [[TMP15]], align 4 +; SCALABLE-NEXT: [[TMP17:%.*]] = fcmp fast ueq float [[TMP16]], 3.000000e+00 +; SCALABLE-NEXT: [[TMP18]] = select i1 [[TMP17]], i32 [[TMP14]], i32 1 +; SCALABLE-NEXT: [[TMP19]] = add nuw nsw i64 [[TMP21]], 1 +; SCALABLE-NEXT: [[TMP20:%.*]] = icmp eq i64 [[TMP19]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP20]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[DOTLCSSA]] ; entry: br label %for.body @@ -359,11 +610,41 @@ exit: ; preds = %for.body } define float @select_const_f32_from_icmp(ptr nocapture readonly %v, i64 %n) #0 { -; CHECK-LABEL: @select_const_f32_from_icmp -; CHECK-NOT: vector.body +; CHECK-LABEL: define float @select_const_f32_from_icmp( +; CHECK-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00 +; CHECK-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]] +; CHECK-NEXT: br i1 [[TMP7]], label [[EXIT:%.*]], label [[FOR_BODY]] +; CHECK: exit: +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ] +; CHECK-NEXT: ret float [[DOTLCSSA]] ; -; SCALABLE-LABEL: @select_const_f32_from_icmp -; SCALABLE-NOT: vector.body +; SCALABLE-LABEL: define float @select_const_f32_from_icmp( +; SCALABLE-SAME: ptr nocapture readonly [[V:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[TMP0:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP1:%.*]] = phi fast float [ 3.000000e+00, [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] +; SCALABLE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[V]], i64 [[TMP0]] +; SCALABLE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; SCALABLE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 3 +; SCALABLE-NEXT: [[TMP5]] = select fast i1 [[TMP4]], float [[TMP1]], float 7.000000e+00 +; SCALABLE-NEXT: [[TMP6]] = add nuw nsw i64 [[TMP0]], 1 +; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], [[N]] +; SCALABLE-NEXT: br i1 [[TMP7]], label [[EXIT:%.*]], label [[FOR_BODY]] +; SCALABLE: exit: +; SCALABLE-NEXT: [[DOTLCSSA:%.*]] = phi float [ [[TMP5]], [[FOR_BODY]] ] +; SCALABLE-NEXT: ret float [[DOTLCSSA]] ; entry: br label %for.body @@ -384,63 +665,127 @@ exit: ; preds = %for.body } define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) #0 { -; CHECK-LABEL: @pred_select_const_i32_from_icmp +; CHECK-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 35, i32 35, i32 35, i32 35> -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP4]], <4 x i32> poison) -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], <i32 2, i32 2, i32 2, i32 2> -; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i1> [[VEC_PHI]], [[TMP8]] -; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP4]], <4 x i1> [[TMP9]], <4 x i1> [[VEC_PHI]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], <i32 35, i32 35, i32 35, i32 35> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[TMP5]], i32 4, <4 x i1> [[TMP3]], <4 x i32> poison) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i32> [[WIDE_MASKED_LOAD]], <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP7]], <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[PREDPHI]]) -; CHECK-NEXT: [[FR:%.*]] = freeze i1 [[TMP12]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[PREDPHI]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP9]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP10]], 35 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP11]], 2 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: for.end.loopexit: +; CHECK-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[R_1_LCSSA]] ; -; SCALABLE-LABEL: @pred_select_const_i32_from_icmp +; SCALABLE-LABEL: define i32 @pred_select_const_i32_from_icmp( +; SCALABLE-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) #[[ATTR0]] { +; SCALABLE-NEXT: entry: +; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 %n, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 %n, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: -; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[SRC2:%.*]], i64 [[TMP4]] -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 0 -; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i32> poison) +; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; SCALABLE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4 +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 35, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) +; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[SRC2]], i64 [[TMP6]] +; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; SCALABLE-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> poison) ; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) -; SCALABLE-NEXT: [[TMP13:%.*]] = or <vscale x 4 x i1> [[VEC_PHI]], [[TMP12]] -; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> [[TMP13]], <vscale x 4 x i1> [[VEC_PHI]] -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] -; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: [[TMP13:%.*]] = select <vscale x 4 x i1> [[TMP12]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> [[VEC_PHI]] +; SCALABLE-NEXT: [[PREDPHI]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x i32> [[TMP13]], <vscale x 4 x i32> [[VEC_PHI]] +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; SCALABLE-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[PREDPHI]]) -; SCALABLE-NEXT: [[FR:%.*]] = freeze i1 [[TMP18]] -; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR]], i32 1, i32 0 +; SCALABLE-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <vscale x 4 x i32> [[PREDPHI]], zeroinitializer +; SCALABLE-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[RDX_SELECT_CMP]]) +; SCALABLE-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP15]], i32 1, i32 0 +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; SCALABLE: scalar.ph: +; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; SCALABLE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: br label [[FOR_BODY:%.*]] +; SCALABLE: for.body: +; SCALABLE-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; SCALABLE-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; SCALABLE-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; SCALABLE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP16]], 35 +; SCALABLE-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; SCALABLE: if.then: +; SCALABLE-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; SCALABLE-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; SCALABLE-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP17]], 2 +; SCALABLE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] +; SCALABLE-NEXT: br label [[FOR_INC]] +; SCALABLE: for.inc: +; SCALABLE-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; SCALABLE-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; SCALABLE: for.end.loopexit: +; SCALABLE-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: ret i32 [[R_1_LCSSA]] ; entry: br label %for.body @@ -472,3 +817,34 @@ for.end.loopexit: ; preds = %for.inc } attributes #0 = { "target-features"="+f,+v" } +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. +; SCALABLE: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; SCALABLE: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; SCALABLE: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; SCALABLE: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]} +; SCALABLE: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]} +; SCALABLE: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll index c55e732..59b8ce4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll @@ -25,7 +25,7 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 2147483616 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] @@ -201,7 +201,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775792 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to double +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to double ; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast double [[DOTCAST]], 3.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; AUTO_VEC: vector.body: @@ -366,7 +366,7 @@ define void @fadd_reassoc_FMF(ptr nocapture %p, i32 %N) { ; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; AUTO_VEC: vector.ph: ; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 4294967264 -; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; AUTO_VEC-NEXT: [[TMP1:%.*]] = fmul reassoc float [[DOTCAST]], 4.200000e+01 ; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd reassoc float [[TMP1]], 1.000000e+00 ; AUTO_VEC-NEXT: br label [[VECTOR_BODY:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll index eea2894..aea72b7 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1400,15 +1400,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX2-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -12 ; AVX2-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -3 ; AVX2-NEXT: [[REVERSE12:%.*]] = shufflevector <4 x i1> [[TMP16]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> -; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP25]], i32 8, <4 x i1> [[REVERSE12]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE13:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: [[REVERSE14:%.*]] = shufflevector <4 x i1> [[TMP17]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP27]], i32 8, <4 x i1> [[REVERSE14]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD15]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: [[REVERSE17:%.*]] = shufflevector <4 x i1> [[TMP18]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP29]], i32 8, <4 x i1> [[REVERSE17]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE19:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD18]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i1> [[TMP19]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[TMP31]], i32 8, <4 x i1> [[REVERSE20]], <4 x double> poison), !alias.scope !21 ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD21]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; AVX2-NEXT: [[TMP32:%.*]] = fadd <4 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> @@ -1524,15 +1524,15 @@ define void @foo6(ptr nocapture readonly %in, ptr nocapture %out, i32 %size, ptr ; AVX512-NEXT: [[TMP30:%.*]] = getelementptr double, ptr [[TMP20]], i32 -24 ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[TMP30]], i32 -7 ; AVX512-NEXT: [[REVERSE12:%.*]] = shufflevector <8 x i1> [[TMP16]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> -; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP25]], i32 8, <8 x i1> [[REVERSE12]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE13:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: [[REVERSE14:%.*]] = shufflevector <8 x i1> [[TMP17]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: [[WIDE_MASKED_LOAD15:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP27]], i32 8, <8 x i1> [[REVERSE14]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD15]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: [[REVERSE17:%.*]] = shufflevector <8 x i1> [[TMP18]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: [[WIDE_MASKED_LOAD18:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP29]], i32 8, <8 x i1> [[REVERSE17]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE19:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD18]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i1> [[TMP19]], <8 x i1> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: [[WIDE_MASKED_LOAD21:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0(ptr [[TMP31]], i32 8, <8 x i1> [[REVERSE20]], <8 x double> poison), !alias.scope !34 ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD21]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> ; AVX512-NEXT: [[TMP32:%.*]] = fadd <8 x double> [[REVERSE13]], <double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01, double 5.000000e-01> diff --git a/llvm/test/Transforms/LoopVectorize/blend-in-header.ll b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll new file mode 100644 index 0000000..01e223a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/blend-in-header.ll @@ -0,0 +1,233 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" + +; Test with blend recipe in header VPBB, from +; https://github.com/llvm/llvm-project/issues/88297. +define i64 @pr88297() { +; CHECK-LABEL: define i64 @pr88297() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 1, [[THEN]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ 1, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 false, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 1, %then ], [ 0, %loop.header ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @pr88297_incoming_ops_reordered() { +; CHECK-LABEL: define i64 @pr88297_incoming_ops_reordered() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ 1, [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 false, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 0, %loop.header ], [ 1, %then ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @invar_cond(i1 %c) { +; CHECK-LABEL: define i64 @invar_cond( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> zeroinitializer, <4 x i64> <i64 1, i64 1, i64 1, i64 1> +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 1, [[THEN]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 1, %then ], [ 0, %loop.header ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} + +define i64 @invar_cond_incoming_ops_reordered(i1 %c) { +; CHECK-LABEL: define i64 @invar_cond_incoming_ops_reordered( +; CHECK-SAME: i1 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true, i1 true, i1 true> +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i64> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 +; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[PREDPHI]], i32 3 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C]], label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[R:%.*]] = phi i64 [ 0, [[LOOP_HEADER]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 [[IV]], 1000 +; CHECK-NEXT: br i1 [[ICMP]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[R_LCSSA:%.*]] = phi i64 [ [[R]], [[LOOP_LATCH]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i64 [[R_LCSSA]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c, label %loop.latch, label %then + +then: + br label %loop.latch + +loop.latch: + %r = phi i64 [ 0, %loop.header ], [ 1, %then ] + %iv.next = add i32 %iv, 1 + %icmp = icmp sgt i32 %iv, 1000 + br i1 %icmp, label %exit, label %loop.header + +exit: + %r.lcssa = phi i64 [ %r, %loop.latch ] + ret i64 %r.lcssa +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll index c721da7..0b87270 100644 --- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -19,20 +19,20 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP8]], zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: @@ -42,33 +42,32 @@ define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], 0 ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer -; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] +; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32> [[VEC_PHI6]] ; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <4 x i32> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] -; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 0 +; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP16]], i32 1, i32 0 ; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -102,104 +101,6 @@ exit: ret i32 %select } -define i32 @any_of_reduction_epilog_arg_as_start_value(ptr %src, i64 %N, i32 %start) { -; CHECK-LABEL: define i32 @any_of_reduction_epilog_arg_as_start_value( -; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]], i32 [[START:%.*]]) { -; CHECK-NEXT: iter.check: -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] -; CHECK: vector.main.loop.iter.check: -; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 -; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer -; CHECK-NEXT: [[TMP5]] = or <4 x i1> [[VEC_PHI]], [[TMP4]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: middle.block: -; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP7]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i32 1, i32 [[START]] -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] -; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] -; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] -; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i32 [[BC_MERGE_RDX]], [[START]] -; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 -; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP9]], i64 0 -; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] -; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 -; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer -; CHECK-NEXT: [[TMP14]] = or <4 x i1> [[VEC_PHI6]], [[TMP13]] -; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) -; CHECK-NEXT: [[TMP17:%.*]] = freeze i1 [[TMP16]] -; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP17]], i32 1, i32 [[START]] -; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] -; CHECK: vec.epilog.scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START]], [[ITER_CHECK]] ], [ [[START]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 -; CHECK-NEXT: [[SELECT]] = select i1 [[ICMP]], i32 1, i32 [[RED]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[ICMP3:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[ICMP3]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] -; CHECK: exit: -; CHECK-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i32 [[SELECT_LCSSA]] -; -entry: - br label %loop - -loop: - %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] - %red = phi i32 [ %start, %entry ], [ %select, %loop ] - %gep = getelementptr inbounds i8, ptr %src, i64 %iv - %load = load i8, ptr %gep, align 1 - %icmp = icmp eq i8 %load, 0 - %select = select i1 %icmp, i32 1, i32 %red - %iv.next = add i64 %iv, 1 - %icmp3 = icmp eq i64 %iv, %N - br i1 %icmp3, label %exit, label %loop - -exit: - ret i32 %select -} define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-LABEL: define i1 @any_of_reduction_i1_epilog( @@ -223,15 +124,14 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI]], [[TMP2]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4> ; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) -; CHECK-NEXT: [[TMP6:%.*]] = freeze i1 [[TMP5]] +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) ; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] @@ -244,11 +144,10 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i1 [[BC_MERGE_RDX]], false ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 ; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] ; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 -; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[BC_MERGE_RDX]], i64 0 ; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -261,22 +160,21 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-NEXT: [[TMP10]] = or <4 x i1> [[VEC_PHI10]], [[TMP9]] +; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI10]], <4 x i1> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], <i32 4, i32 4, i32 4, i32 4> ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] -; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: vec.epilog.middle.block: -; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[TMP13:%.*]] = freeze i1 [[TMP12]] +; CHECK-NEXT: [[RDX_SELECT_CMP16:%.*]] = icmp ne <4 x i1> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP16]]) ; CHECK-NEXT: [[RDX_SELECT16:%.*]] = select i1 [[TMP13]], i1 false, i1 false ; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] ; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] ; CHECK: vec.epilog.scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] ; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ false, [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -287,7 +185,7 @@ define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[IV]], [[N]] -; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i1 [ [[SEL]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i1 [[SEL_LCSSA]] @@ -321,7 +219,4 @@ exit: ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} -; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} -; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} -; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META2]], [[META1]]} ;. diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll index caea114..bd658c3 100644 --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -29,7 +29,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -84,7 +84,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -142,7 +142,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -193,7 +193,7 @@ define void @fp_iv_loop1_fast_FMF(float %init, ptr noalias nocapture %A, i32 %N) ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP1]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -276,7 +276,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -331,7 +331,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -389,7 +389,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -442,7 +442,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, ptr noalias nocapture %A, i32 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul reassoc float [[FPINC]], [[DOTCAST]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP1]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -526,7 +526,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -574,7 +574,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -625,7 +625,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -675,7 +675,7 @@ define void @fp_iv_loop2(float %init, ptr noalias nocapture %A, i32 %N) #0 { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -758,10 +758,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL1-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -835,10 +835,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC4_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0 @@ -922,10 +922,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC1_INTERL2-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1000,10 +1000,10 @@ define void @fp_iv_loop3(float %init, ptr noalias nocapture %A, ptr noalias noca ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP1]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP2:%.*]] = fmul fast float [[DOTCAST]], -5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP2]], 0x3FB99999A0000000 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST2:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[TMP0]], [[DOTCAST2]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END3:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0 @@ -1113,7 +1113,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1158,7 +1158,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483640 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1206,7 +1206,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC1_INTERL2-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1256,7 +1256,7 @@ define void @fp_iv_loop4(ptr noalias nocapture %A, i32 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483646 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01 ; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP1]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1319,7 +1319,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL1: vector.ph: ; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804 -; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL1-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ] @@ -1396,7 +1396,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC4_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC4_INTERL2: vector.ph: ; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800 -; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC4_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ] @@ -1512,7 +1512,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC1_INTERL2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VEC1_INTERL2: vector.ph: ; VEC1_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC1_INTERL2-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC1_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC1_INTERL2: vector.body: ; VEC1_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] @@ -1570,7 +1570,7 @@ define void @non_primary_iv_float_scalar(ptr %A, i64 %N) { ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.ph: ; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806 -; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTCAST:%.*]] = uitofp nneg i64 [[N_VEC]] to float ; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE4:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll index 6a9f83a..1b4bcf6 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp-predicated.ll @@ -1,114 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S < %s | FileCheck %s --check-prefix=CHECK-VF2IC1 ; RUN: opt -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC2 define i32 @pred_select_const_i32_from_icmp(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i64 %n) { -; CHECK-VF2IC1-LABEL: @pred_select_const_i32_from_icmp( +; CHECK-VF2IC1-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-VF2IC1-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) { +; CHECK-VF2IC1-NEXT: entry: +; CHECK-VF2IC1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-VF2IC1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF2IC1: vector.ph: +; CHECK-VF2IC1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-VF2IC1-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF2IC1-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF2IC1: vector.body: -; CHECK-VF2IC1: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue2 ] -; CHECK-VF2IC1: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 35, i32 35> -; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-VF2IC1-NEXT: br i1 [[TMP5]], label %pred.load.if, label %pred.load.continue +; CHECK-VF2IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] +; CHECK-VF2IC1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE2]] ] +; CHECK-VF2IC1-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF2IC1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] +; CHECK-VF2IC1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-VF2IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP3:%.*]] = icmp sgt <2 x i32> [[WIDE_LOAD]], <i32 35, i32 35> +; CHECK-VF2IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0 +; CHECK-VF2IC1-NEXT: br i1 [[TMP4]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK-VF2IC1: pred.load.if: -; CHECK-VF2IC1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}} -; CHECK-VF2IC1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-VF2IC1-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 -; CHECK-VF2IC1-NEXT: br label %pred.load.continue +; CHECK-VF2IC1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-VF2IC1-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0 +; CHECK-VF2IC1-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK-VF2IC1: pred.load.continue: -; CHECK-VF2IC1-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %vector.body ], [ [[TMP8]], %pred.load.if ] -; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 -; CHECK-VF2IC1-NEXT: br i1 [[TMP10]], label %pred.load.if1, label %pred.load.continue2 +; CHECK-VF2IC1-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_LOAD_IF]] ] +; CHECK-VF2IC1-NEXT: [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 +; CHECK-VF2IC1-NEXT: br i1 [[TMP9]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] ; CHECK-VF2IC1: pred.load.if1: -; CHECK-VF2IC1: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}} -; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4 -; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP13]], i32 1 -; CHECK-VF2IC1-NEXT: br label %pred.load.continue2 +; CHECK-VF2IC1-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF2IC1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP10]] +; CHECK-VF2IC1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-VF2IC1-NEXT: [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1 +; CHECK-VF2IC1-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK-VF2IC1: pred.load.continue2: -; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %pred.load.continue ], [ [[TMP14]], %pred.load.if1 ] -; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = icmp eq <2 x i32> [[TMP15]], <i32 2, i32 2> -; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = or <2 x i1> [[VEC_PHI]], [[TMP16]] -; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP4]], <2 x i1> [[TMP17]], <2 x i1> [[VEC_PHI]] -; CHECK-VF2IC1: br i1 {{%.*}}, label %middle.block, label %vector.body +; CHECK-VF2IC1-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP13]], [[PRED_LOAD_IF1]] ] +; CHECK-VF2IC1-NEXT: [[TMP15:%.*]] = icmp eq <2 x i32> [[TMP14]], <i32 2, i32 2> +; CHECK-VF2IC1-NEXT: [[TMP16:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> <i32 1, i32 1>, <2 x i32> [[VEC_PHI]] +; CHECK-VF2IC1-NEXT: [[PREDPHI]] = select <2 x i1> [[TMP3]], <2 x i32> [[TMP16]], <2 x i32> [[VEC_PHI]] +; CHECK-VF2IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF2IC1-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF2IC1-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF2IC1: middle.block: -; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[PREDPHI]]) -; CHECK-VF2IC1-NEXT: [[FR_TMP20:%.*]] = freeze i1 [[TMP20]] -; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP20]], i32 1, i32 0 +; CHECK-VF2IC1-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i32> [[PREDPHI]], zeroinitializer +; CHECK-VF2IC1-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-VF2IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP18]], i32 1, i32 0 +; CHECK-VF2IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF2IC1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF2IC1: scalar.ph: -; CHECK-VF2IC1: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] -; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ] -; CHECK-VF2IC1-NEXT: br label %for.body +; CHECK-VF2IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF2IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF2IC1-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF2IC1: for.body: -; CHECK-VF2IC1: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ] -; CHECK-VF2IC1: [[TMP21:%.*]] = load i32, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP21]], 35 -; CHECK-VF2IC1-NEXT: br i1 [[CMP1]], label %if.then, label %for.inc +; CHECK-VF2IC1-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-VF2IC1-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-VF2IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-VF2IC1-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-VF2IC1-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35 +; CHECK-VF2IC1-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-VF2IC1: if.then: -; CHECK-VF2IC1: [[TMP22:%.*]] = load i32, ptr {{%.*}}, align 4 -; CHECK-VF2IC1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP22]], 2 +; CHECK-VF2IC1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-VF2IC1-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-VF2IC1-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2 ; CHECK-VF2IC1-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] -; CHECK-VF2IC1-NEXT: br label %for.inc +; CHECK-VF2IC1-NEXT: br label [[FOR_INC]] ; CHECK-VF2IC1: for.inc: -; CHECK-VF2IC1-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ] +; CHECK-VF2IC1-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-VF2IC1-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-VF2IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-VF2IC1-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF2IC1: for.end.loopexit: -; CHECK-VF2IC1-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ] +; CHECK-VF2IC1-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] ; CHECK-VF2IC1-NEXT: ret i32 [[R_1_LCSSA]] ; -; CHECK-VF1IC2-LABEL: @pred_select_const_i32_from_icmp( +; CHECK-VF1IC2-LABEL: define i32 @pred_select_const_i32_from_icmp( +; CHECK-VF1IC2-SAME: ptr noalias nocapture readonly [[SRC1:%.*]], ptr noalias nocapture readonly [[SRC2:%.*]], i64 [[N:%.*]]) { +; CHECK-VF1IC2-NEXT: entry: +; CHECK-VF1IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; CHECK-VF1IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-VF1IC2: vector.ph: +; CHECK-VF1IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; CHECK-VF1IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-VF1IC2-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-VF1IC2: vector.body: -; CHECK-VF1IC2: [[VEC_PHI:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI:%.*]], %pred.load.continue3 ] -; CHECK-VF1IC2-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[PREDPHI5:%.*]], %pred.load.continue3 ] -; CHECK-VF1IC2: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[SRC1:%.*]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 -; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], 35 -; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP3]], 35 -; CHECK-VF1IC2-NEXT: br i1 [[TMP4]], label %pred.load.if, label %pred.load.continue +; CHECK-VF1IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE3:%.*]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_LOAD_CONTINUE3]] ] +; CHECK-VF1IC2-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_LOAD_CONTINUE3]] ] +; CHECK-VF1IC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-VF1IC2-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; CHECK-VF1IC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP0]] +; CHECK-VF1IC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[TMP1]] +; CHECK-VF1IC2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4 +; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP4]], 35 +; CHECK-VF1IC2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], 35 +; CHECK-VF1IC2-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK-VF1IC2: pred.load.if: -; CHECK-VF1IC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC2:%.*]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; CHECK-VF1IC2-NEXT: br label %pred.load.continue +; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP0]] +; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-VF1IC2-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK-VF1IC2: pred.load.continue: -; CHECK-VF1IC2-NEXT: [[TMP8:%.*]] = phi i32 [ poison, %vector.body ], [ [[TMP7]], %pred.load.if ] -; CHECK-VF1IC2-NEXT: br i1 [[TMP5]], label %pred.load.if2, label %pred.load.continue3 +; CHECK-VF1IC2-NEXT: [[TMP10:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] +; CHECK-VF1IC2-NEXT: br i1 [[TMP7]], label [[PRED_LOAD_IF2:%.*]], label [[PRED_LOAD_CONTINUE3]] ; CHECK-VF1IC2: pred.load.if2: -; CHECK-VF1IC2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 {{%.*}} -; CHECK-VF1IC2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -; CHECK-VF1IC2-NEXT: br label %pred.load.continue3 +; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[TMP1]] +; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +; CHECK-VF1IC2-NEXT: br label [[PRED_LOAD_CONTINUE3]] ; CHECK-VF1IC2: pred.load.continue3: -; CHECK-VF1IC2-NEXT: [[TMP11:%.*]] = phi i32 [ poison, %pred.load.continue ], [ [[TMP10]], %pred.load.if2 ] -; CHECK-VF1IC2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP8]], 2 -; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP11]], 2 -; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = or i1 [[VEC_PHI]], [[TMP12]] -; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = or i1 [[VEC_PHI2]], [[TMP13]] -; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP4]], i1 [[TMP14]], i1 [[VEC_PHI]] -; CHECK-VF1IC2-NEXT: [[PREDPHI5]] = select i1 [[TMP5]], i1 [[TMP15]], i1 [[VEC_PHI2]] -; CHECK-VF1IC2: br i1 {{%.*}}, label %middle.block, label %vector.body +; CHECK-VF1IC2-NEXT: [[TMP13:%.*]] = phi i32 [ poison, [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF2]] ] +; CHECK-VF1IC2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP10]], 2 +; CHECK-VF1IC2-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP13]], 2 +; CHECK-VF1IC2-NEXT: [[TMP16:%.*]] = select i1 [[TMP14]], i32 1, i32 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[TMP17:%.*]] = select i1 [[TMP15]], i32 1, i32 [[VEC_PHI1]] +; CHECK-VF1IC2-NEXT: [[PREDPHI]] = select i1 [[TMP6]], i32 [[TMP16]], i32 [[VEC_PHI]] +; CHECK-VF1IC2-NEXT: [[PREDPHI4]] = select i1 [[TMP7]], i32 [[TMP17]], i32 [[VEC_PHI1]] +; CHECK-VF1IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-VF1IC2-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-VF1IC2-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK-VF1IC2: middle.block: -; CHECK-VF1IC2-NEXT: [[OR:%.*]] = or i1 [[PREDPHI5]], [[PREDPHI]] -; CHECK-VF1IC2-NEXT: [[FR_OR:%.*]] = freeze i1 [[OR]] -; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_OR]], i32 1, i32 0 -; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %scalar.ph +; CHECK-VF1IC2-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i32 [[PREDPHI]], 0 +; CHECK-VF1IC2-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i32 [[PREDPHI]], i32 [[PREDPHI4]] +; CHECK-VF1IC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-VF1IC2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK-VF1IC2: scalar.ph: -; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ {{%.*}}, %middle.block ], [ 0, %entry ] -; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, %entry ], [ [[RDX_SELECT]], %middle.block ] -; CHECK-VF1IC2-NEXT: br label %for.body +; CHECK-VF1IC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-VF1IC2-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] +; CHECK-VF1IC2-NEXT: br label [[FOR_BODY:%.*]] ; CHECK-VF1IC2: for.body: -; CHECK-VF1IC2-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], %for.inc ], [ [[BC_RESUME_VAL]], %scalar.ph ] -; CHECK-VF1IC2-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], %for.inc ], [ [[BC_MERGE_RDX]], %scalar.ph ] -; CHECK-VF1IC2: [[TMP19:%.*]] = load i32, ptr {{%.*}}, align 4 +; CHECK-VF1IC2-NEXT: [[I_013:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-VF1IC2-NEXT: [[R_012:%.*]] = phi i32 [ [[R_1:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; CHECK-VF1IC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC1]], i64 [[I_013]] +; CHECK-VF1IC2-NEXT: [[TMP19:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 ; CHECK-VF1IC2-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[TMP19]], 35 -; CHECK-VF1IC2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label %for.inc +; CHECK-VF1IC2-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK-VF1IC2: if.then: -; CHECK-VF1IC2: [[TMP20:%.*]] = load i32, ptr {{%.*}}, align 4 +; CHECK-VF1IC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[SRC2]], i64 [[I_013]] +; CHECK-VF1IC2-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 ; CHECK-VF1IC2-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TMP20]], 2 ; CHECK-VF1IC2-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP3]], i32 1, i32 [[R_012]] -; CHECK-VF1IC2-NEXT: br label %for.inc +; CHECK-VF1IC2-NEXT: br label [[FOR_INC]] ; CHECK-VF1IC2: for.inc: -; CHECK-VF1IC2-NEXT: [[R_1]] = phi i32 [ [[R_012]], %for.body ], [ [[SPEC_SELECT]], %if.then ] -; CHECK-VF1IC2: br i1 {{%.*}}, label %for.end.loopexit, label %for.body +; CHECK-VF1IC2-NEXT: [[R_1]] = phi i32 [ [[R_012]], [[FOR_BODY]] ], [ [[SPEC_SELECT]], [[IF_THEN]] ] +; CHECK-VF1IC2-NEXT: [[INC]] = add nuw nsw i64 [[I_013]], 1 +; CHECK-VF1IC2-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; CHECK-VF1IC2-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK-VF1IC2: for.end.loopexit: -; CHECK-VF1IC2-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], %for.inc ], [ [[RDX_SELECT]], %middle.block ] +; CHECK-VF1IC2-NEXT: [[R_1_LCSSA:%.*]] = phi i32 [ [[R_1]], [[FOR_INC]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ] ; CHECK-VF1IC2-NEXT: ret i32 [[R_1_LCSSA]] ; entry: @@ -139,3 +180,14 @@ for.end.loopexit: ; preds = %for.inc %r.1.lcssa = phi i32 [ %r.1, %for.inc ] ret i32 %r.1.lcssa } +;. +; CHECK-VF2IC1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF2IC1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF2IC1: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF2IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} +;. +; CHECK-VF1IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK-VF1IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-VF1IC2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK-VF1IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]} +;. diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll index 993b56a..c9f2aae 100644 --- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll @@ -5,47 +5,45 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF4IC4: vector.body: -; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF4IC4: [[VEC_PHI1:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF4IC4: [[VEC_ICMP1:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> ; CHECK-VF4IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> ; CHECK-VF4IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> ; CHECK-VF4IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq <4 x i32> {{.*}}, <i32 3, i32 3, i32 3, i32 3> -; CHECK-VF4IC4-NEXT: [[NOT1:%.*]] = xor <4 x i1> [[VEC_ICMP1]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC4-NEXT: [[NOT2:%.*]] = xor <4 x i1> [[VEC_ICMP2]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC4-NEXT: [[NOT3:%.*]] = xor <4 x i1> [[VEC_ICMP3]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC4-NEXT: [[NOT4:%.*]] = xor <4 x i1> [[VEC_ICMP4]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = or <4 x i1> [[VEC_PHI1]], [[NOT1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = or <4 x i1> [[VEC_PHI2]], [[NOT2]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = or <4 x i1> [[VEC_PHI3]], [[NOT3]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = or <4 x i1> [[VEC_PHI4]], [[NOT4]] +; CHECK-VF4IC4-NEXT: [[VEC_SEL1:%.*]] = select <4 x i1> [[VEC_ICMP1]], <4 x i32> [[VEC_PHI1]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> +; CHECK-VF4IC4-NEXT: [[VEC_SEL2:%.*]] = select <4 x i1> [[VEC_ICMP2]], <4 x i32> [[VEC_PHI2]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> +; CHECK-VF4IC4-NEXT: [[VEC_SEL3:%.*]] = select <4 x i1> [[VEC_ICMP3]], <4 x i32> [[VEC_PHI3]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> +; CHECK-VF4IC4-NEXT: [[VEC_SEL4:%.*]] = select <4 x i1> [[VEC_ICMP4]], <4 x i32> [[VEC_PHI4]], <4 x i32> <i32 7, i32 7, i32 7, i32 7> ; CHECK-VF4IC4: middle.block: -; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = or <4 x i1> [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = or <4 x i1> [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF4IC4-NEXT: [[VEC_SEL7:%.*]] = or <4 x i1> [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL7]]) -; CHECK-VF4IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne <4 x i32> [[VEC_SEL1]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC4-NEXT: [[VEC_SEL5:%.*]] = select <4 x i1> [[VEC_ICMP5]], <4 x i32> [[VEC_SEL1]], <4 x i32> [[VEC_SEL2]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne <4 x i32> [[VEC_SEL5]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC4-NEXT: [[VEC_SEL6:%.*]] = select <4 x i1> [[VEC_ICMP6]], <4 x i32> [[VEC_SEL5]], <4 x i32> [[VEC_SEL3]] +; CHECK-VF4IC4-NEXT: [[VEC_ICMP7:%.*]] = icmp ne <4 x i32> [[VEC_SEL6]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC4-NEXT: [[VEC_SEL_FIN:%.*]] = select <4 x i1> [[VEC_ICMP7]], <4 x i32> [[VEC_SEL6]], <4 x i32> [[VEC_SEL4]] +; CHECK-VF4IC4-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL_FIN]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC4-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC4-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 ; CHECK-VF1IC4: vector.body: -; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] +; CHECK-VF1IC4: [[VEC_PHI1:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL1:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL2:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL3:%.*]], %vector.body ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i32 [ 3, %vector.ph ], [ [[VEC_SEL4:%.*]], %vector.body ] ; CHECK-VF1IC4: [[VEC_LOAD1:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD2:%.*]] = load i32 ; CHECK-VF1IC4-NEXT: [[VEC_LOAD3:%.*]] = load i32 @@ -54,20 +52,17 @@ define i32 @select_const_i32_from_icmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-VF1IC4-NEXT: [[VEC_ICMP2:%.*]] = icmp eq i32 [[VEC_LOAD2]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP3:%.*]] = icmp eq i32 [[VEC_LOAD3]], 3 ; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp eq i32 [[VEC_LOAD4]], 3 -; CHECK-VF1IC4-NEXT: [[NOT1:%.*]] = xor i1 [[VEC_ICMP1]], true -; CHECK-VF1IC4-NEXT: [[NOT2:%.*]] = xor i1 [[VEC_ICMP2]], true -; CHECK-VF1IC4-NEXT: [[NOT3:%.*]] = xor i1 [[VEC_ICMP3]], true -; CHECK-VF1IC4-NEXT: [[NOT4:%.*]] = xor i1 [[VEC_ICMP4]], true -; CHECK-VF1IC4-NEXT: [[VEC_SEL1:%.*]] = or i1 [[VEC_PHI1]], [[NOT1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL2:%.*]] = or i1 [[VEC_PHI2]], [[NOT2]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL3:%.*]] = or i1 [[VEC_PHI3]], [[NOT3]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL4:%.*]] = or i1 [[VEC_PHI4]], [[NOT4]] +; CHECK-VF1IC4-NEXT: [[VEC_SEL1]] = select i1 [[VEC_ICMP1]], i32 [[VEC_PHI1]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL2]] = select i1 [[VEC_ICMP2]], i32 [[VEC_PHI2]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL3]] = select i1 [[VEC_ICMP3]], i32 [[VEC_PHI3]], i32 7 +; CHECK-VF1IC4-NEXT: [[VEC_SEL4]] = select i1 [[VEC_ICMP4]], i32 [[VEC_PHI4]], i32 7 ; CHECK-VF1IC4: middle.block: -; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = or i1 [[VEC_SEL2]], [[VEC_SEL1]] -; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = or i1 [[VEC_SEL3]], [[VEC_SEL5]] -; CHECK-VF1IC4-NEXT: [[OR_RDX:%.*]] = or i1 [[VEC_SEL4]], [[VEC_SEL6]] -; CHECK-VF1IC4-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF1IC4-NEXT: [[VEC_ICMP4:%.*]] = icmp ne i32 [[VEC_SEL1]], 3 +; CHECK-VF1IC4-NEXT: [[VEC_SEL5:%.*]] = select i1 [[VEC_ICMP4]], i32 [[VEC_SEL1]], i32 [[VEC_SEL2]] +; CHECK-VF1IC4-NEXT: [[VEC_ICMP5:%.*]] = icmp ne i32 [[VEC_SEL5]], 3 +; CHECK-VF1IC4-NEXT: [[VEC_SEL6:%.*]] = select i1 [[VEC_ICMP5]], i32 [[VEC_SEL5]], i32 [[VEC_SEL3]] +; CHECK-VF1IC4-NEXT: [[VEC_ICMP6:%.*]] = icmp ne i32 [[VEC_SEL6]], 3 +; CHECK-VF1IC4-NEXT: {{.*}} = select i1 [[VEC_ICMP6]], i32 [[VEC_SEL6]], i32 [[VEC_SEL4]] entry: br label %for.body @@ -91,14 +86,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_icmp2(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_icmp2 ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 3, i32 3, i32 3, i32 3>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[VEC_ICMP]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 7, i32 3 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 7, i32 3 entry: br label %for.body @@ -122,18 +117,21 @@ exit: ; preds = %for.body define i32 @select_i32_from_icmp(ptr nocapture readonly %v, i32 %a, i32 %b, i64 %n) { ; CHECK-LABEL: @select_i32_from_icmp ; CHECK-VF4IC1: vector.ph: -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> -; CHECK-VF4IC1-NOT: shufflevector <4 x i32> +; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x i32> ; CHECK-VF4IC1-NEXT: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_LOAD]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a entry: br label %for.body @@ -156,15 +154,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp_fast(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp_fast ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp fast ueq <4 x float> [[VEC_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 2, i32 2, i32 2, i32 2> +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -187,15 +184,14 @@ exit: ; preds = %for.body define i32 @select_const_i32_from_fcmp(ptr nocapture readonly %v, i64 %n) { ; CHECK-LABEL: @select_const_i32_from_fcmp ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 2, i32 2, i32 2, i32 2>, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] ; CHECK-VF4IC1: [[VEC_LOAD:%.*]] = load <4 x float> ; CHECK-VF4IC1-NEXT: [[VEC_FCMP:%.*]] = fcmp ueq <4 x float> [[VEC_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_FCMP]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_FCMP]], <4 x i32> [[VEC_PHI]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 1, i32 2 +; CHECK-VF4IC1-NEXT: [[FIN_ICMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], <i32 2, i32 2, i32 2, i32 2> +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_ICMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 1, i32 2 entry: br label %for.body @@ -220,16 +216,18 @@ define i32 @select_i32_from_icmp_same_inputs(i32 %a, i32 %b, i64 %n) { ; CHECK-VF4IC1: vector.ph: ; CHECK-VF4IC1: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 ; CHECK-VF4IC1-NEXT: [[SPLAT_OF_A:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-VF4IC1-NOT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 %b, i64 0 +; CHECK-VF4IC1-NEXT: [[SPLAT_OF_B:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer ; CHECK-VF4IC1: vector.body: -; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] -; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[SPLAT_OF_A]], <i32 3, i32 3, i32 3, i32 3> -; CHECK-VF4IC1-NEXT: [[NOT:%.*]] = xor <4 x i1> [[VEC_ICMP]], <i1 true, i1 true, i1 true, i1 true> -; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = or <4 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-VF4IC1: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[SPLAT_OF_A]], %vector.ph ], [ [[VEC_SEL:%.*]], %vector.body ] +; CHECK-VF4IC1: [[VEC_ICMP:%.*]] = icmp eq <4 x i32> [[VEC_PHI]], <i32 3, i32 3, i32 3, i32 3> +; CHECK-VF4IC1-NEXT: [[VEC_SEL]] = select <4 x i1> [[VEC_ICMP]], <4 x i32> [[VEC_PHI]], <4 x i32> [[SPLAT_OF_B]] ; CHECK-VF4IC1: middle.block: -; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[VEC_SEL]]) -; CHECK-VF4IC1-NEXT: [[FR_OR_RDX:%.*]] = freeze i1 [[OR_RDX]] -; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[FR_OR_RDX]], i32 %b, i32 %a +; CHECK-VF4IC1-NEXT: [[FIN_INS:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 +; CHECK-VF4IC1-NEXT: [[FIN_SPLAT:%.*]] = shufflevector <4 x i32> [[FIN_INS]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-VF4IC1-NEXT: [[FIN_CMP:%.*]] = icmp ne <4 x i32> [[VEC_SEL]], [[FIN_SPLAT]] +; CHECK-VF4IC1-NEXT: [[OR_RDX:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[FIN_CMP]]) +; CHECK-VF4IC1-NEXT: {{.*}} = select i1 [[OR_RDX]], i32 %b, i32 %a entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll index 55e6115..16ab454 100644 --- a/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll +++ b/llvm/test/Transforms/LoopVectorize/select-reduction-start-value-may-be-undef-or-poison.ll @@ -8,25 +8,26 @@ define i64 @pr62565_incoming_value_known_undef(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ undef, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1> -; CHECK-NEXT: [[NOT:%*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 undef +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], undef +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 undef ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -71,25 +72,26 @@ define i64 @pr62565_incoming_value_known_poison(i64 %a, ptr %src) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ poison, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1> -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 poison +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], poison +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 poison ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] @@ -134,25 +136,30 @@ define i64 @pr62565_incoming_value_may_be_poison(i64 %a, ptr %src, i64 %start) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <2 x i64> [[MINMAX_IDENT_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 1, [[INDEX]] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i32> [[WIDE_LOAD]], <i32 1, i32 1> -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true> -; CHECK-NEXT: [[TMP4]] = or <2 x i1> [[VEC_PHI]], [[NOT]] +; CHECK-NEXT: [[TMP4]] = select <2 x i1> [[TMP3]], <2 x i64> [[VEC_PHI]], <2 x i64> [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]]) -; CHECK-NEXT: [[FR_TMP6:%.*]] = freeze i1 [[TMP6]] -; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[FR_TMP6]], i64 [[A]], i64 [[START]] +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[START]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <2 x i64> [[TMP4]], [[DOTSPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i64 [[A]], i64 [[START]] ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 33, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll index a90b38c..fe98ca1 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -9,6 +9,11 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=4 \ ; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP %s +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP-DEF %s + ; The target does not support predicated vectorization. define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-LABEL: @foo( @@ -80,6 +85,54 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; NO-VP: for.cond.cleanup: ; NO-VP-NEXT: ret void ; +; NO-VP-DEF-LABEL: @foo( +; NO-VP-DEF-NEXT: entry: +; NO-VP-DEF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP0]] +; NO-VP-DEF-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP-DEF: vector.ph: +; NO-VP-DEF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP1]] +; NO-VP-DEF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-DEF-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-DEF-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP-DEF: vector.body: +; NO-VP-DEF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-DEF-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-DEF-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-DEF-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i32>, ptr [[TMP5]], align 4 +; NO-VP-DEF-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-DEF-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 1 x i32>, ptr [[TMP7]], align 4 +; NO-VP-DEF-NEXT: [[TMP8:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-DEF-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; NO-VP-DEF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-DEF-NEXT: store <vscale x 1 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; NO-VP-DEF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]] +; NO-VP-DEF-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-DEF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP-DEF: middle.block: +; NO-VP-DEF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-DEF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP-DEF: scalar.ph: +; NO-VP-DEF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-DEF-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP-DEF: for.body: +; NO-VP-DEF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-DEF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-DEF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-DEF-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; NO-VP-DEF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-DEF-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-DEF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-DEF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-DEF-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP-DEF: for.cond.cleanup: +; NO-VP-DEF-NEXT: ret void +; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll index 0cacb02..108b78a 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -986,8 +986,8 @@ define void @sinking_requires_duplication(ptr %addr) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%gep> = getelementptr ir<%addr>, vp<[[STEPS]]> -; CHECK-NEXT: REPLICATE store ir<1.000000e+01>, ir<%gep> +; CHECK-NEXT: REPLICATE ir<%gep>.1 = getelementptr ir<%addr>, vp<[[STEPS]]> +; CHECK-NEXT: REPLICATE store ir<1.000000e+01>, ir<%gep>.1 ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: @@ -1129,8 +1129,8 @@ define void @ptr_induction_remove_dead_recipe(ptr %start, ptr %end) { ; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.if: -; CHECK-NEXT: REPLICATE ir<%ptr.iv.next> = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> -; CHECK-NEXT: REPLICATE store ir<95>, ir<%ptr.iv.next> +; CHECK-NEXT: REPLICATE ir<%ptr.iv.next>.1 = getelementptr inbounds vp<[[PTR_IV]]>, ir<-1> +; CHECK-NEXT: REPLICATE store ir<95>, ir<%ptr.iv.next>.1 ; CHECK-NEXT: Successor(s): pred.store.continue ; CHECK-EMPTY: ; CHECK-NEXT: pred.store.continue: diff --git a/llvm/test/Transforms/Reassociate/inverses.ll b/llvm/test/Transforms/Reassociate/inverses.ll index b6962c6..a9d0c4f 100644 --- a/llvm/test/Transforms/Reassociate/inverses.ll +++ b/llvm/test/Transforms/Reassociate/inverses.ll @@ -12,12 +12,12 @@ define i32 @test1(i32 %a, i32 %b) { ret i32 %t5 } -define <2 x i32> @not_op_vec_undef(<2 x i32> %a, <2 x i32> %b) { -; CHECK-LABEL: @not_op_vec_undef( +define <2 x i32> @not_op_vec_poison(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: @not_op_vec_poison( ; CHECK-NEXT: ret <2 x i32> zeroinitializer ; %t2 = and <2 x i32> %b, %a - %t4 = xor <2 x i32> %a, <i32 -1, i32 undef> + %t4 = xor <2 x i32> %a, <i32 -1, i32 poison> %t5 = and <2 x i32> %t2, %t4 ret <2 x i32> %t5 } diff --git a/llvm/test/Transforms/Reassociate/negation.ll b/llvm/test/Transforms/Reassociate/negation.ll index 4718d9d..14ae86f 100644 --- a/llvm/test/Transforms/Reassociate/negation.ll +++ b/llvm/test/Transforms/Reassociate/negation.ll @@ -31,16 +31,16 @@ define i32 @test2(i32 %a, i32 %b, i32 %z) { ret i32 %f } -define <2 x i32> @negate_vec_undefs(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z) { -; CHECK-LABEL: @negate_vec_undefs( +define <2 x i32> @negate_vec_poisons(<2 x i32> %a, <2 x i32> %b, <2 x i32> %z) { +; CHECK-LABEL: @negate_vec_poisons( ; CHECK-NEXT: [[E:%.*]] = mul <2 x i32> [[A:%.*]], <i32 40, i32 40> ; CHECK-NEXT: [[F:%.*]] = mul <2 x i32> [[E]], [[Z:%.*]] ; CHECK-NEXT: ret <2 x i32> [[F]] ; %d = mul <2 x i32> %z, <i32 40, i32 40> - %c = sub <2 x i32> <i32 0, i32 undef>, %d + %c = sub <2 x i32> <i32 0, i32 poison>, %d %e = mul <2 x i32> %a, %c - %f = sub <2 x i32> <i32 0, i32 undef>, %e + %f = sub <2 x i32> <i32 0, i32 poison>, %e ret <2 x i32> %f } diff --git a/llvm/test/Transforms/SCCP/pr50901.ll b/llvm/test/Transforms/SCCP/pr50901.ll index 11d6bba..d48d675 100644 --- a/llvm/test/Transforms/SCCP/pr50901.ll +++ b/llvm/test/Transforms/SCCP/pr50901.ll @@ -52,6 +52,16 @@ ; CHECK: = !DIGlobalVariableExpression(var: ![[DBG_FLOAT_UNDEF:.+]], expr: !DIExpression()) ; CHECK-DAG: ![[DBG_FLOAT_UNDEF]] = distinct !DIGlobalVariable(name: "g_float_undef" +; CHECK: ![[G8:[0-9]+]] = !DIGlobalVariableExpression(var: ![[DBG8:[0-9]+]], expr: !DIExpression(DW_OP_constu, 22136, DW_OP_stack_value)) +; CHECK-DAG: ![[DBG8]] = distinct !DIGlobalVariable(name: "g_88", {{.*}} +; CHECK: ![[G9:[0-9]+]] = !DIGlobalVariableExpression(var: ![[DBG9:[0-9]+]], expr: !DIExpression(DW_OP_constu, 23726, DW_OP_stack_value)) +; CHECK-DAG: ![[DBG9]] = distinct !DIGlobalVariable(name: "g_99", {{.*}} + +; CHECK-DAG: ![[DBGA:[0-9]+]] = distinct !DIGlobalVariable(name: "g_i32_undef" +; CHECK-DAG: ![[GA:[0-9]+]] = !DIGlobalVariableExpression(var: ![[DBGA]], expr: !DIExpression()) +; CHECK-DAG: ![[DBGB:[0-9]+]] = distinct !DIGlobalVariable(name: "g_ptr_undef" +; CHECK-DAG: ![[GB:[0-9]+]] = !DIGlobalVariableExpression(var: ![[DBGB]], expr: !DIExpression()) + @g_1 = dso_local global i32 -4, align 4, !dbg !0 @g_2 = dso_local global float 0x4011C28F60000000, align 4, !dbg !8 @g_3 = dso_local global i8 97, align 1, !dbg !10 @@ -59,6 +69,8 @@ @g_5 = dso_local global i8 1, align 1, !dbg !16 @g_6 = dso_local global ptr null, align 8, !dbg !19 @g_7 = dso_local global ptr null, align 8, !dbg !23 +@g_8 = dso_local global half 0xH4321, align 4, !dbg !86 +@g_9 = dso_local global bfloat 0xR3F80, align 4, !dbg !90 @_ZL4g_11 = internal global i32 -5, align 4, !dbg !25 @_ZL4g_22 = internal global float 0x4016333340000000, align 4, !dbg !27 @_ZL4g_33 = internal global i8 98, align 1, !dbg !29 @@ -67,6 +79,10 @@ @_ZL4g_66 = internal global ptr null, align 8, !dbg !35 @_ZL4g_77 = internal global ptr inttoptr (i64 70 to ptr), align 8, !dbg !37 @g_float_undef = internal global float undef, align 4, !dbg !83 +@_ZL4g_88 = internal global half 0xH5678, align 4, !dbg !88 +@_ZL4g_99 = internal global bfloat 0xR5CAE, align 4, !dbg !92 +@g_i32_undef = internal global i32 undef, align 4, !dbg !95 +@g_ptr_undef = internal global ptr undef, align 8, !dbg !97 define dso_local void @_Z3barv() !dbg !46 { entry: @@ -88,6 +104,15 @@ entry: store ptr %6, ptr @g_7, align 8, !dbg !59 %l = load float, ptr @g_float_undef, align 8, !dbg !59 store float %l, ptr @g_2, align 8, !dbg !59 + %7 = load half, ptr @_ZL4g_88, align 4, !dbg !59 + store half %7, ptr @g_8, align 4, !dbg !59 + %8 = load bfloat, ptr @_ZL4g_99, align 4, !dbg !59 + store bfloat %8, ptr @g_9, align 4, !dbg !59 + %9 = load i32, ptr @g_i32_undef, align 4, !dbg !59 + store i32 %9, ptr @g_1, align 4, !dbg !59 + %10 = load ptr, ptr @g_ptr_undef, align 8, !dbg !59 + store ptr %10, ptr @g_6, align 8, !dbg !59 + ret void, !dbg !59 } @@ -108,7 +133,7 @@ entry: !4 = !{!5} !5 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !6, size: 64) !6 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) -!7 = !{!0, !8, !10, !13, !16, !19, !23, !25, !27, !29, !31, !33, !35, !37, !83} +!7 = !{!0, !8, !10, !13, !16, !19, !23, !25, !27, !29, !31, !33, !35, !37, !83, !86, !88, !90, !92, !95, !97} !8 = !DIGlobalVariableExpression(var: !9, expr: !DIExpression()) !9 = distinct !DIGlobalVariable(name: "g_2", scope: !2, file: !3, line: 2, type: !6, isLocal: false, isDefinition: true) !10 = !DIGlobalVariableExpression(var: !11, expr: !DIExpression()) @@ -159,3 +184,17 @@ entry: !82 = !DILocation(line: 31, column: 1, scope: !77) !83 = !DIGlobalVariableExpression(var: !84, expr: !DIExpression()) !84 = distinct !DIGlobalVariable(name: "g_float_undef", linkageName: "g_float_undef", scope: !2, file: !3, line: 15, type: !6, isLocal: true, isDefinition: true) +!85 = !DIBasicType(name: "float", size: 16, encoding: DW_ATE_float) +!86 = !DIGlobalVariableExpression(var: !87, expr: !DIExpression()) +!87 = distinct !DIGlobalVariable(name: "g_8", scope: !2, file: !3, line: 2, type: !85, isLocal: false, isDefinition: true) +!88 = !DIGlobalVariableExpression(var: !89, expr: !DIExpression()) +!89 = distinct !DIGlobalVariable(name: "g_88", linkageName: "_ZL4g_88", scope: !2, file: !3, line: 10, type: !85, isLocal: true, isDefinition: true) +!90 = !DIGlobalVariableExpression(var: !91, expr: !DIExpression()) +!91 = distinct !DIGlobalVariable(name: "g_9", scope: !2, file: !3, line: 2, type: !85, isLocal: false, isDefinition: true) +!92 = !DIGlobalVariableExpression(var: !93, expr: !DIExpression()) +!93 = distinct !DIGlobalVariable(name: "g_99", linkageName: "_ZL4g_99", scope: !2, file: !3, line: 10, type: !85, isLocal: true, isDefinition: true) + +!95 = !DIGlobalVariableExpression(var: !96, expr: !DIExpression()) +!96 = distinct !DIGlobalVariable(name: "g_i32_undef", linkageName: "g_i32_undef", scope: !2, file: !3, line: 9, type: !22, isLocal: true, isDefinition: true) +!97 = !DIGlobalVariableExpression(var: !98, expr: !DIExpression()) +!98 = distinct !DIGlobalVariable(name: "g_ptr_undef", linkageName: "g_ptr_undef", scope: !2, file: !3, line: 14, type: !21, isLocal: true, isDefinition: true) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll new file mode 100644 index 0000000..5db148a --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/smax-unsigned-operand.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "riscv64-unknown-linux-gnu" + +@e = global [2 x i8] zeroinitializer + +define void @main(ptr noalias %p) { +; CHECK-LABEL: define void @main( +; CHECK-SAME: ptr noalias [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[CONV_4:%.*]] = zext i32 0 to i64 +; CHECK-NEXT: [[COND_4:%.*]] = tail call i64 @llvm.smax.i64(i64 [[CONV_4]], i64 0) +; CHECK-NEXT: [[CONV5_4:%.*]] = trunc i64 [[COND_4]] to i8 +; CHECK-NEXT: store i8 [[CONV5_4]], ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 4), align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[CONV_5:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[COND_5:%.*]] = tail call i64 @llvm.smax.i64(i64 [[CONV_5]], i64 1) +; CHECK-NEXT: [[CONV5_5:%.*]] = trunc i64 [[COND_5]] to i8 +; CHECK-NEXT: store i8 [[CONV5_5]], ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 5), align 1 +; CHECK-NEXT: ret void +; +bb: + %conv.4 = zext i32 0 to i64 + %cond.4 = tail call i64 @llvm.smax.i64(i64 %conv.4, i64 0) + %conv5.4 = trunc i64 %cond.4 to i8 + store i8 %conv5.4, ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 4), align 1 + %0 = load i32, ptr %p, align 4 + %conv.5 = zext i32 %0 to i64 + %cond.5 = tail call i64 @llvm.smax.i64(i64 %conv.5, i64 1) + %conv5.5 = trunc i64 %cond.5 to i8 + store i8 %conv5.5, ptr getelementptr inbounds ([11 x i8], ptr @e, i64 0, i64 5), align 1 + ret void +} + diff --git a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll index b76e26e..2ff6785 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/ext-int-reduced-not-operand.ll @@ -1,7 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999\ +; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define i64 @wombat() { +; FORCED-LABEL: define i64 @wombat() { +; FORCED-NEXT: bb: +; FORCED-NEXT: br label [[BB2:%.*]] +; FORCED: bb1: +; FORCED-NEXT: br label [[BB2]] +; FORCED: bb2: +; FORCED-NEXT: [[PHI:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ 0, [[BB1:%.*]] ] +; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[PHI]], i32 0 +; FORCED-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; FORCED-NEXT: [[TMP2:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i1> +; FORCED-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; FORCED-NEXT: [[TMP4:%.*]] = zext i1 [[TMP3]] to i64 +; FORCED-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 +; FORCED-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64 +; FORCED-NEXT: [[OR:%.*]] = or i64 [[TMP4]], [[TMP6]] +; FORCED-NEXT: ret i64 [[OR]] +; ; CHECK-LABEL: define i64 @wombat() { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB2:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll index 096f57d..c600d75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll @@ -13,7 +13,7 @@ define i32 @fn1() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 11, i64 56> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> ; CHECK-NEXT: store <2 x i64> [[TMP5]], ptr [[TMP4]], align 8 ; CHECK-NEXT: ret i32 undef @@ -92,7 +92,7 @@ define void @externally_used_ptrs() { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 56, i64 11> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i64 11 ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 ; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[TMP5]], [[TMP6]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll index 3c3dea3..f2ea2df 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-move-out-of-loop.ll @@ -1,7 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu < %s | FileCheck %s +; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-100 -mtriple=x86_64-w64-windows-gnu\ +; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define void @test(i16 %0) { +; FORCED-LABEL: @test( +; FORCED-NEXT: for.body92.preheader: +; FORCED-NEXT: [[TMP1:%.*]] = insertelement <2 x i16> <i16 0, i16 poison>, i16 [[TMP0:%.*]], i32 1 +; FORCED-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i32> +; FORCED-NEXT: [[TMP3:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i32> +; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3> +; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison> +; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> <i32 poison, i32 0, i32 poison, i32 0>, <4 x i32> [[TMP5]], <4 x i32> <i32 4, i32 1, i32 6, i32 3> +; FORCED-NEXT: br label [[FOR_BODY92:%.*]] +; FORCED: for.body92: +; FORCED-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> zeroinitializer, [[TMP6]] +; FORCED-NEXT: store <4 x i32> [[TMP7]], ptr undef, align 8 +; FORCED-NEXT: br label [[FOR_BODY92]] +; ; CHECK-LABEL: @test( ; CHECK-NEXT: for.body92.preheader: ; CHECK-NEXT: br label [[FOR_BODY92:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll index 19a8aa9..d7144d75 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gathered-delayed-nodes-with-reused-user.ll @@ -1,7 +1,30 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999 < %s | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-9999\ +; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define i64 @foo() { +; FORCED-LABEL: define i64 @foo() { +; FORCED-NEXT: bb: +; FORCED-NEXT: br label [[BB3:%.*]] +; FORCED: bb1: +; FORCED-NEXT: [[TMP0:%.*]] = phi <2 x i64> [ [[TMP5:%.*]], [[BB3]] ] +; FORCED-NEXT: ret i64 0 +; FORCED: bb3: +; FORCED-NEXT: [[PHI5:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ 0, [[BB3]] ] +; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i64> [ zeroinitializer, [[BB]] ], [ [[TMP7:%.*]], [[BB3]] ] +; FORCED-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[PHI5]], i32 0 +; FORCED-NEXT: [[TMP3:%.*]] = add <2 x i64> [[TMP1]], [[TMP2]] +; FORCED-NEXT: [[TMP4:%.*]] = or <2 x i64> [[TMP1]], [[TMP2]] +; FORCED-NEXT: [[TMP5]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], <2 x i32> <i32 0, i32 3> +; FORCED-NEXT: [[TMP6:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 0, i32 3> +; FORCED-NEXT: [[TMP7]] = add <2 x i64> [[TMP6]], [[TMP2]] +; FORCED-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1 +; FORCED-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr i64, ptr addrspace(1) null, i64 [[TMP8]] +; FORCED-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1 +; FORCED-NEXT: [[ICMP:%.*]] = icmp ult i64 [[TMP9]], 0 +; FORCED-NEXT: br i1 false, label [[BB3]], label [[BB1:%.*]] +; ; CHECK-LABEL: define i64 @foo() { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB3:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll index aa67974..e459cd8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/geps-non-pow-2.ll @@ -13,25 +13,26 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK: while.body: ; CHECK-NEXT: [[C_022:%.*]] = phi ptr [ [[C_022_BE:%.*]], [[WHILE_BODY_BACKEDGE:%.*]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x ptr> [ [[TMP14:%.*]], [[WHILE_BODY_BACKEDGE]] ], [ undef, [[ENTRY]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[C_022]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP9]] to i64 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 1, i64 1> ; CHECK-NEXT: switch i32 [[TMP3]], label [[WHILE_BODY_BACKEDGE]] [ -; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 4, label [[SW_BB6:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64 ; CHECK-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2> -; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x ptr> [[TMP4]], i32 1 -; CHECK-NEXT: store i32 [[TMP7]], ptr [[TMP9]], align 4 ; CHECK-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2 +; CHECK-NEXT: store i32 [[TMP7]], ptr [[INCDEC_PTR1]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2> ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: sw.bb6: ; CHECK-NEXT: [[INCDEC_PTR8:%.*]] = getelementptr inbounds i32, ptr [[C_022]], i64 2 +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i64 1 ; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[INCDEC_PTR]] to i64 ; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32 ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, <2 x ptr> [[TMP1]], <2 x i64> <i64 2, i64 2> @@ -39,7 +40,7 @@ define dso_local i32 @g() local_unnamed_addr { ; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP13]], align 4 ; CHECK-NEXT: br label [[WHILE_BODY_BACKEDGE]] ; CHECK: while.body.backedge: -; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ] +; CHECK-NEXT: [[C_022_BE]] = phi ptr [ [[INCDEC_PTR1]], [[WHILE_BODY]] ], [ [[INCDEC_PTR8]], [[SW_BB6]] ], [ [[INCDEC_PTR5]], [[SW_BB]] ] ; CHECK-NEXT: [[TMP14]] = phi <2 x ptr> [ [[TMP4]], [[WHILE_BODY]] ], [ [[TMP12]], [[SW_BB6]] ], [ [[TMP8]], [[SW_BB]] ] ; CHECK-NEXT: br label [[WHILE_BODY]] ; CHECK: while.end: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll index 59cd1c0..1163c82 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/non-scheduled-inst-reused-as-last-inst.ll @@ -1,7 +1,29 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 ; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s +; RUN: opt -S -passes=slp-vectorizer -slp-threshold=-9999 -mtriple=x86_64-unknown-linux-gnu\ +; RUN: -slp-skip-early-profitability-check < %s | FileCheck %s --check-prefixes=FORCED define void @foo() { +; FORCED-LABEL: define void @foo() { +; FORCED-NEXT: bb: +; FORCED-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 0, i32 0 +; FORCED-NEXT: br label [[BB1:%.*]] +; FORCED: bb1: +; FORCED-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, [[BB:%.*]] ], [ [[TMP6:%.*]], [[BB4:%.*]] ] +; FORCED-NEXT: [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], [[TMP0]] +; FORCED-NEXT: [[TMP3:%.*]] = or <2 x i32> [[TMP1]], [[TMP0]] +; FORCED-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 3> +; FORCED-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3> +; FORCED-NEXT: [[TMP6]] = or <2 x i32> [[TMP5]], zeroinitializer +; FORCED-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP6]], i32 0 +; FORCED-NEXT: [[CALL:%.*]] = call i64 null(i32 [[TMP7]]) +; FORCED-NEXT: br label [[BB4]] +; FORCED: bb4: +; FORCED-NEXT: br i1 false, label [[BB5:%.*]], label [[BB1]] +; FORCED: bb5: +; FORCED-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ [[TMP4]], [[BB4]] ] +; FORCED-NEXT: ret void +; ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[BB1:%.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll index 3801fa5..c40be96 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/opaque-ptr.ll @@ -52,17 +52,14 @@ define void @test(ptr %r, ptr %p, ptr %q) #0 { define void @test2(ptr %a, ptr %b) { ; CHECK-LABEL: @test2( -; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 1 -; CHECK-NEXT: [[A2:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 2 -; CHECK-NEXT: [[I1:%.*]] = ptrtoint ptr [[A1]] to i64 -; CHECK-NEXT: [[B3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 3 -; CHECK-NEXT: [[I2:%.*]] = ptrtoint ptr [[B3]] to i64 -; CHECK-NEXT: [[V1:%.*]] = load i64, ptr [[A1]], align 8 -; CHECK-NEXT: [[V2:%.*]] = load i64, ptr [[A2]], align 8 -; CHECK-NEXT: [[ADD1:%.*]] = add i64 [[I1]], [[V1]] -; CHECK-NEXT: [[ADD2:%.*]] = add i64 [[I2]], [[V2]] -; CHECK-NEXT: store i64 [[ADD1]], ptr [[A1]], align 8 -; CHECK-NEXT: store i64 [[ADD2]], ptr [[A2]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[B:%.*]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, <2 x ptr> [[TMP2]], <2 x i64> <i64 1, i64 3> +; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1 +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint <2 x ptr> [[TMP3]] to <2 x i64> +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr [[A1]], align 8 +; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i64> [[TMP4]], [[TMP5]] +; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr [[A1]], align 8 ; CHECK-NEXT: ret void ; %a1 = getelementptr inbounds i64, ptr %a, i64 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll index ddc2a1b..30f3282 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll @@ -9,7 +9,7 @@ define void @"foo"(ptr addrspace(1) %0, ptr addrspace(1) %1) #0 { ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x ptr addrspace(1)> poison, ptr addrspace(1) [[TMP0:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x ptr addrspace(1)> [[TMP3]], <4 x ptr addrspace(1)> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, <4 x ptr addrspace(1)> [[TMP4]], <4 x i64> <i64 8, i64 12, i64 28, i64 24> -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr addrspace(1)> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP0]], i64 8 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP1:%.*]], i64 8 ; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1(<4 x ptr addrspace(1)> [[TMP5]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> poison) ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll index 0125e5f..e93c524 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stacksave-dependence.ll @@ -35,7 +35,7 @@ define void @allocas(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: store <2 x ptr> [[TMP3]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: ret void @@ -127,7 +127,7 @@ define void @stacksave2(ptr %a, ptr %b, ptr %c) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[V1]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x ptr> [[TMP1]], ptr [[V2]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, <2 x ptr> [[TMP2]], <2 x i32> <i32 1, i32 1> -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP3]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[V1]], i32 1 ; CHECK-NEXT: store ptr [[TMP4]], ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @use(ptr inalloca(i8) [[V2]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[STACK]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll index 81b4ee4..2f0fad7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/trunc-store-value-ty-not-power-of-2.ll @@ -107,3 +107,36 @@ define void @test_4_trunc_i24_to_i16(i24 %x, ptr %A) { store i16 %t, ptr %gep.3, align 1 ret void } + +%struct.d = type { [3 x i8], [3 x i8], [2 x i8] } + +; Test case for https://github.com/llvm/llvm-project/issues/88640. +define void @test_access_i24_directly(ptr %src, ptr noalias %dst) "target-cpu"="btver2" { +; CHECK-LABEL: define void @test_access_i24_directly( +; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[SRC]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i24 +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [[STRUCT_D:%.*]], ptr [[SRC]], i64 0, i32 1 +; CHECK-NEXT: [[BF_LOAD:%.*]] = load i24, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i24 [[TMP1]], 8388607 +; CHECK-NEXT: [[BF_CLEAR:%.*]] = and i24 [[BF_LOAD]], -8388608 +; CHECK-NEXT: [[BF_SET:%.*]] = or disjoint i24 [[BF_CLEAR]], [[BF_VALUE]] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds [[STRUCT_D]], ptr [[DST]], i64 0, i32 1 +; CHECK-NEXT: store i24 [[BF_SET]], ptr [[GEP_DST]], align 1 +; CHECK-NEXT: store i24 0, ptr [[DST]], align 8 +; CHECK-NEXT: ret void +; +entry: + %0 = load i64, ptr %src, align 8 + %1 = trunc i64 %0 to i24 + %gep.src = getelementptr inbounds %struct.d, ptr %src, i64 0, i32 1 + %bf.load = load i24, ptr %gep.src, align 1 + %bf.value = and i24 %1, 8388607 + %bf.clear = and i24 %bf.load, -8388608 + %bf.set = or disjoint i24 %bf.clear, %bf.value + %gep.dst = getelementptr inbounds %struct.d, ptr %dst, i64 0, i32 1 + store i24 %bf.set, ptr %gep.dst, align 1 + store i24 0, ptr %dst, align 8 + ret void +} diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 0fcd787..61034de 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -539,7 +539,7 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !6 = !{!5, !5, i64 0} !7 = !{i64 0, i64 8, !6, i64 8, i64 4, !1} !8 = !{i64 0, i64 4, !1, i64 4, i64 8, !6} -!9 = !{i64 0, i64 8, !6, i64 4, i64 8, !1} +!9 = !{i64 0, i64 8, !6, i64 8, i64 8, !1} !10 = !{i64 0, i64 2, !1, i64 2, i64 2, !1} !11 = !{i64 0, i64 1, !1, i64 1, i64 3, !1} !12 = !{i64 0, i64 2, !1, i64 2, i64 6, !1} diff --git a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll index bbcdcb6..73ae66d 100644 --- a/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll +++ b/llvm/test/Transforms/Scalarizer/basic-inseltpoison.ll @@ -836,5 +836,6 @@ define <2 x i32> @f23_crash(<2 x i32> %srcvec, i32 %v1) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, null } +!5 = !{ i64 0, i64 8, !6 } +!6 = !{ !1, !1, i64 0 } !13 = distinct !{} diff --git a/llvm/test/Transforms/Scalarizer/basic.ll b/llvm/test/Transforms/Scalarizer/basic.ll index db7c5f5..87a70cc 100644 --- a/llvm/test/Transforms/Scalarizer/basic.ll +++ b/llvm/test/Transforms/Scalarizer/basic.ll @@ -870,5 +870,6 @@ define <2 x float> @f25(<2 x float> %src) { !2 = !{ !"set2", !0 } !3 = !{ !3, !{!"llvm.loop.parallel_accesses", !13} } !4 = !{ float 4.0 } -!5 = !{ i64 0, i64 8, null } +!5 = !{ i64 0, i64 8, !6 } +!6 = !{ !1, !1, i64 0 } !13 = distinct !{} diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll new file mode 100644 index 0000000..d96dfec --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll @@ -0,0 +1,647 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes='vector-combine' -S %s | FileCheck %s + +target triple = "aarch64" + +define <8 x i8> @trivial(<8 x i8> %a) { +; CHECK-LABEL: @trivial( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[AT]], <4 x i8> [[AB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %r = shufflevector <4 x i8> %at, <4 x i8> %ab, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @add(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @add( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, %bt + %abb = add <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +; Different opcodes +define <8 x i8> @wrong_addsub(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @wrong_addsub( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = sub <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, %bt + %abb = sub <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +; Different lanes that do not make an identity +define <8 x i8> @wrong_lanes(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @wrong_lanes( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, %bt + %abb = add <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 6, i32 7, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x half> @fadd(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @fadd( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = fadd <4 x half> %at, %bt + %abb = fadd <4 x half> %ab, %bb + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x half> @fneg(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @fneg( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = fneg <4 x half> [[AT]] +; CHECK-NEXT: [[ABB:%.*]] = fneg <4 x half> [[AB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = fneg <4 x half> %at + %abb = fneg <4 x half> %ab + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x i8> @abs(<8 x i8> %a) { +; CHECK-LABEL: @abs( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AT]], i1 true) +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[AB]], i1 false) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %at, i1 true) + %abb = call <4 x i8> @llvm.abs.v4i8(<4 x i8> %ab, i1 false) + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x half> @splat0(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @splat0( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] +; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bs = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> zeroinitializer + %abt = fadd <4 x half> %at, %bs + %abb = fadd <4 x half> %ab, %bs + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x half> @splat2(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @splat2( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> +; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] +; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bs = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2> + %abt = fadd <4 x half> %at, %bs + %abb = fadd <4 x half> %ab, %bs + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x half> @splatandidentity(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @splatandidentity( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ABT:%.*]] = fadd <4 x half> [[AT]], [[BS]] +; CHECK-NEXT: [[ABB:%.*]] = fadd <4 x half> [[AB]], [[BS]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bs = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer + %abt = fadd <4 x half> %at, %bs + %abb = fadd <4 x half> %ab, %bs + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x half> @splattwice(<8 x half> %a, <8 x half> %b) { +; CHECK-LABEL: @splattwice( +; CHECK-NEXT: [[AS:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BS:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[AB1:%.*]] = fadd <4 x half> [[AS]], [[BS]] +; CHECK-NEXT: [[AB2:%.*]] = fadd <4 x half> [[AS]], [[BS]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[AB1]], <4 x half> [[AB2]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %as = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> zeroinitializer + %bs = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> zeroinitializer + %ab1 = fadd <4 x half> %as, %bs + %ab2 = fadd <4 x half> %as, %bs + %r = shufflevector <4 x half> %ab1, <4 x half> %ab2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define <8 x i8> @undeflane(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @undeflane( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 poison, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, %bt + %abb = add <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 poison, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @constantsplat(<8 x i8> %a) { +; CHECK-LABEL: @constantsplat( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], <i8 10, i8 10, i8 10, i8 10> +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], <i8 10, i8 10, i8 10, i8 10> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, <i8 10, i8 10, i8 10, i8 10> + %abb = add <4 x i8> %ab, <i8 10, i8 10, i8 10, i8 10> + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @constantdiff(<8 x i8> %a) { +; CHECK-LABEL: @constantdiff( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], <i8 1, i8 2, i8 3, i8 4> +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], <i8 5, i8 6, i8 7, i8 8> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, <i8 1, i8 2, i8 3, i8 4> + %abb = add <4 x i8> %ab, <i8 5, i8 6, i8 7, i8 8> + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @constantdiff2(<8 x i8> %a) { +; CHECK-LABEL: @constantdiff2( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], <i8 1, i8 2, i8 3, i8 4> +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], <i8 1, i8 2, i8 3, i8 4> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, <i8 1, i8 2, i8 3, i8 4> + %abb = add <4 x i8> %ab, <i8 1, i8 2, i8 3, i8 4> + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @inner_shuffle(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CHECK-LABEL: @inner_shuffle( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[CS:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[ABT:%.*]] = mul <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = mul <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[ABT2:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[ABB2:%.*]] = shufflevector <4 x i8> [[ABB]], <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[ABT3:%.*]] = add <4 x i8> [[ABT2]], [[CS]] +; CHECK-NEXT: [[ABB3:%.*]] = add <4 x i8> [[ABB2]], [[CS]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %cs = shufflevector <8 x i8> %c, <8 x i8> poison, <4 x i32> zeroinitializer + %abt = mul <4 x i8> %at, %bt + %abb = mul <4 x i8> %ab, %bb + %abt2 = shufflevector <4 x i8> %abt, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %abb2 = shufflevector <4 x i8> %abb, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %abt3 = add <4 x i8> %abt2, %cs + %abb3 = add <4 x i8> %abb2, %cs + %r = shufflevector <4 x i8> %abt3, <4 x i8> %abb3, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @extrause_add(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @extrause_add( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: call void @use(<4 x i8> [[ABB]]) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = add <4 x i8> %at, %bt + %abb = add <4 x i8> %ab, %bb + call void @use(<4 x i8> %abb) + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @extrause_shuffle(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @extrause_shuffle( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B1:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT1:%.*]] = shufflevector <8 x i8> [[B1]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: call void @use(<4 x i8> [[BT1]]) +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i8> [[BT]], [[BT1]] +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + call void @use(<4 x i8> %bt) + %abt = add <4 x i8> %at, %bt + %abb = add <4 x i8> %ab, %bb + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @icmpsel(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { +; CHECK-LABEL: @icmpsel( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x i8> [[C:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x i8> [[C]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[DB:%.*]] = shufflevector <8 x i8> [[D:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[DT:%.*]] = shufflevector <8 x i8> [[D]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT1:%.*]] = icmp slt <4 x i8> [[AT]], [[BT]] +; CHECK-NEXT: [[ABB1:%.*]] = icmp slt <4 x i8> [[AB]], [[BB]] +; CHECK-NEXT: [[ABT:%.*]] = select <4 x i1> [[ABT1]], <4 x i8> [[CT]], <4 x i8> [[DT]] +; CHECK-NEXT: [[ABB:%.*]] = select <4 x i1> [[ABB1]], <4 x i8> [[CB]], <4 x i8> [[DB]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %cb = shufflevector <8 x i8> %c, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %ct = shufflevector <8 x i8> %c, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %db = shufflevector <8 x i8> %d, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %dt = shufflevector <8 x i8> %d, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt1 = icmp slt <4 x i8> %at, %bt + %abb1 = icmp slt <4 x i8> %ab, %bb + %abt = select <4 x i1> %abt1, <4 x i8> %ct, <4 x i8> %dt + %abb = select <4 x i1> %abb1, <4 x i8> %cb, <4 x i8> %db + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x half> @fma(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: @fma( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x half> [[A:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x half> [[A]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x half> [[B:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x half> [[B]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[CB:%.*]] = shufflevector <8 x half> [[C:%.*]], <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[CT:%.*]] = shufflevector <8 x half> [[C]], <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABB:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AB]], <4 x half> [[BB]], <4 x half> [[CB]]) +; CHECK-NEXT: [[ABT:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[AT]], <4 x half> [[BT]], <4 x half> [[CT]]) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x half> [[ABT]], <4 x half> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x half> [[R]] +; + %ab = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %cb = shufflevector <8 x half> %c, <8 x half> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %ct = shufflevector <8 x half> %c, <8 x half> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abb = call <4 x half> @llvm.fma.v4f16(<4 x half> %ab, <4 x half> %bb, <4 x half> %cb) + %abt = call <4 x half> @llvm.fma.v4f16(<4 x half> %at, <4 x half> %bt, <4 x half> %ct) + %r = shufflevector <4 x half> %abt, <4 x half> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x half> %r +} + +define void @exttrunc(<8 x i32> %a, <8 x i32> %b, ptr %p) { +; CHECK-LABEL: @exttrunc( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i32> [[A:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i32> [[A]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i32> [[B:%.*]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i32> [[B]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[AB1:%.*]] = zext <4 x i32> [[AB]] to <4 x i64> +; CHECK-NEXT: [[AT1:%.*]] = zext <4 x i32> [[AT]] to <4 x i64> +; CHECK-NEXT: [[BB1:%.*]] = sext <4 x i32> [[BB]] to <4 x i64> +; CHECK-NEXT: [[BT1:%.*]] = sext <4 x i32> [[BT]] to <4 x i64> +; CHECK-NEXT: [[ABB:%.*]] = add <4 x i64> [[AB1]], [[BB1]] +; CHECK-NEXT: [[ABT:%.*]] = add <4 x i64> [[AT1]], [[BT1]] +; CHECK-NEXT: [[ABB1:%.*]] = trunc <4 x i64> [[ABB]] to <4 x i32> +; CHECK-NEXT: [[ABT1:%.*]] = trunc <4 x i64> [[ABT]] to <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[ABB1]], <4 x i32> [[ABT1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> +; CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[P:%.*]], align 32 +; CHECK-NEXT: ret void +; + %ab = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %at = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %bb = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %bt = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %ab1 = zext <4 x i32> %ab to <4 x i64> + %at1 = zext <4 x i32> %at to <4 x i64> + %bb1 = sext <4 x i32> %bb to <4 x i64> + %bt1 = sext <4 x i32> %bt to <4 x i64> + %abb = add <4 x i64> %ab1, %bb1 + %abt = add <4 x i64> %at1, %bt1 + %abb1 = trunc <4 x i64> %abb to <4 x i32> + %abt1 = trunc <4 x i64> %abt to <4 x i32> + %r = shufflevector <4 x i32> %abb1, <4 x i32> %abt1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> + store <8 x i32> %r, ptr %p + ret void +} + +define <8 x i8> @intrinsics_minmax(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @intrinsics_minmax( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.smix.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R1]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %at, <4 x i8> %bt) + %abb = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %ab, <4 x i8> %bb) + %abt1 = call <4 x i8> @llvm.smix.v4i8(<4 x i8> %abt, <4 x i8> %bt) + %abb1 = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %abb, <4 x i8> %bb) + %abt2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abt1, <4 x i8> %bt) + %abb2 = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %abb1, <4 x i8> %bb) + %abt3 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %abt2, <4 x i8> %bt) + %abb3 = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %abb2, <4 x i8> %bb) + %r = shufflevector <4 x i8> %abt3, <4 x i8> %abb3, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @intrinsics_addsat(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @intrinsics_addsat( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB1:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[ABB]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABT1]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB2:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[ABB1]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[ABT3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABT2]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB3:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[ABB2]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i8> [[ABT3]], <4 x i8> [[ABB3]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R1]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %at, <4 x i8> %bt) + %abb = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %ab, <4 x i8> %bb) + %abt1 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %abt, <4 x i8> %bt) + %abb1 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %abb, <4 x i8> %bb) + %abt2 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %abt1, <4 x i8> %bt) + %abb2 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %abb1, <4 x i8> %bb) + %abt3 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %abt2, <4 x i8> %bt) + %abb3 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %abb2, <4 x i8> %bb) + %r = shufflevector <4 x i8> %abt3, <4 x i8> %abb3, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define <8 x i8> @intrinsics_different(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: @intrinsics_different( +; CHECK-NEXT: [[AB:%.*]] = shufflevector <8 x i8> [[A:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[AT:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[BB:%.*]] = shufflevector <8 x i8> [[B:%.*]], <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: [[BT:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> +; CHECK-NEXT: [[ABT:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[AT]], <4 x i8> [[BT]]) +; CHECK-NEXT: [[ABB:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[AB]], <4 x i8> [[BB]]) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i8> [[ABT]], <4 x i8> [[ABB]], <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +; CHECK-NEXT: ret <8 x i8> [[R]] +; + %ab = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %at = shufflevector <8 x i8> %a, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %bb = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> + %bt = shufflevector <8 x i8> %b, <8 x i8> poison, <4 x i32> <i32 7, i32 6, i32 5, i32 4> + %abt = call <4 x i8> @llvm.smin(<4 x i8> %at, <4 x i8> %bt) + %abb = call <4 x i8> @llvm.umin(<4 x i8> %ab, <4 x i8> %bb) + %r = shufflevector <4 x i8> %abt, <4 x i8> %abb, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> + ret <8 x i8> %r +} + +define void @v8f64interleave(i64 %0, ptr %1, ptr %x, double %z) { +; CHECK-LABEL: @v8f64interleave( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[Z:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x double>, ptr [[TMP1:%.*]], align 8 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 0, i32 8> +; CHECK-NEXT: [[STRIDED_VEC27:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 1, i32 9> +; CHECK-NEXT: [[STRIDED_VEC28:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 2, i32 10> +; CHECK-NEXT: [[STRIDED_VEC29:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 3, i32 11> +; CHECK-NEXT: [[STRIDED_VEC30:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 4, i32 12> +; CHECK-NEXT: [[STRIDED_VEC31:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 5, i32 13> +; CHECK-NEXT: [[STRIDED_VEC32:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 6, i32 14> +; CHECK-NEXT: [[STRIDED_VEC33:%.*]] = shufflevector <16 x double> [[WIDE_VEC]], <16 x double> poison, <2 x i32> <i32 7, i32 15> +; CHECK-NEXT: [[TMP2:%.*]] = fmul fast <2 x double> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, ptr [[X:%.*]], i64 [[TMP0:%.*]] +; CHECK-NEXT: [[WIDE_VEC34:%.*]] = load <16 x double>, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[STRIDED_VEC35:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 0, i32 8> +; CHECK-NEXT: [[STRIDED_VEC36:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 1, i32 9> +; CHECK-NEXT: [[STRIDED_VEC37:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 2, i32 10> +; CHECK-NEXT: [[STRIDED_VEC38:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 3, i32 11> +; CHECK-NEXT: [[STRIDED_VEC39:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 4, i32 12> +; CHECK-NEXT: [[STRIDED_VEC40:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 5, i32 13> +; CHECK-NEXT: [[STRIDED_VEC41:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 6, i32 14> +; CHECK-NEXT: [[STRIDED_VEC42:%.*]] = shufflevector <16 x double> [[WIDE_VEC34]], <16 x double> poison, <2 x i32> <i32 7, i32 15> +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[STRIDED_VEC35]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul fast <2 x double> [[STRIDED_VEC27]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = fadd fast <2 x double> [[STRIDED_VEC36]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <2 x double> [[STRIDED_VEC28]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd fast <2 x double> [[STRIDED_VEC37]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = fmul fast <2 x double> [[STRIDED_VEC29]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP10:%.*]] = fadd fast <2 x double> [[STRIDED_VEC38]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = fmul fast <2 x double> [[STRIDED_VEC30]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd fast <2 x double> [[STRIDED_VEC39]], [[TMP11]] +; CHECK-NEXT: [[TMP13:%.*]] = fmul fast <2 x double> [[STRIDED_VEC31]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP14:%.*]] = fadd fast <2 x double> [[STRIDED_VEC40]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = fmul fast <2 x double> [[STRIDED_VEC32]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP16:%.*]] = fadd fast <2 x double> [[STRIDED_VEC41]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = or disjoint i64 [[TMP0]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <2 x double> [[STRIDED_VEC33]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP20:%.*]] = fadd fast <2 x double> [[STRIDED_VEC42]], [[TMP18]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 -56 +; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <2 x double> [[TMP12]], <2 x double> [[TMP14]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <2 x double> [[TMP16]], <2 x double> [[TMP20]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x double> [[TMP22]], <4 x double> [[TMP23]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x double> [[TMP24]], <4 x double> [[TMP25]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP26]], <8 x double> [[TMP27]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> +; CHECK-NEXT: store <16 x double> [[INTERLEAVED_VEC]], ptr [[TMP21]], align 8 +; CHECK-NEXT: ret void +; +entry: + %broadcast.splatinsert = insertelement <2 x double> poison, double %z, i64 0 + %broadcast.splat = shufflevector <2 x double> %broadcast.splatinsert, <2 x double> poison, <2 x i32> zeroinitializer + %wide.vec = load <16 x double>, ptr %1, align 8 + %strided.vec = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 0, i32 8> + %strided.vec27 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 1, i32 9> + %strided.vec28 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 2, i32 10> + %strided.vec29 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 3, i32 11> + %strided.vec30 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 4, i32 12> + %strided.vec31 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 5, i32 13> + %strided.vec32 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 6, i32 14> + %strided.vec33 = shufflevector <16 x double> %wide.vec, <16 x double> poison, <2 x i32> <i32 7, i32 15> + %2 = fmul fast <2 x double> %strided.vec, %broadcast.splat + %3 = getelementptr inbounds double, ptr %x, i64 %0 + %wide.vec34 = load <16 x double>, ptr %3, align 8 + %strided.vec35 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 0, i32 8> + %strided.vec36 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 1, i32 9> + %strided.vec37 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 2, i32 10> + %strided.vec38 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 3, i32 11> + %strided.vec39 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 4, i32 12> + %strided.vec40 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 5, i32 13> + %strided.vec41 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 6, i32 14> + %strided.vec42 = shufflevector <16 x double> %wide.vec34, <16 x double> poison, <2 x i32> <i32 7, i32 15> + %4 = fadd fast <2 x double> %strided.vec35, %2 + %5 = fmul fast <2 x double> %strided.vec27, %broadcast.splat + %6 = fadd fast <2 x double> %strided.vec36, %5 + %7 = fmul fast <2 x double> %strided.vec28, %broadcast.splat + %8 = fadd fast <2 x double> %strided.vec37, %7 + %9 = fmul fast <2 x double> %strided.vec29, %broadcast.splat + %10 = fadd fast <2 x double> %strided.vec38, %9 + %11 = fmul fast <2 x double> %strided.vec30, %broadcast.splat + %12 = fadd fast <2 x double> %strided.vec39, %11 + %13 = fmul fast <2 x double> %strided.vec31, %broadcast.splat + %14 = fadd fast <2 x double> %strided.vec40, %13 + %15 = fmul fast <2 x double> %strided.vec32, %broadcast.splat + %16 = fadd fast <2 x double> %strided.vec41, %15 + %17 = or disjoint i64 %0, 7 + %18 = fmul fast <2 x double> %strided.vec33, %broadcast.splat + %19 = getelementptr inbounds double, ptr %x, i64 %17 + %20 = fadd fast <2 x double> %strided.vec42, %18 + %21 = getelementptr inbounds i8, ptr %19, i64 -56 + %22 = shufflevector <2 x double> %4, <2 x double> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %23 = shufflevector <2 x double> %8, <2 x double> %10, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %24 = shufflevector <2 x double> %12, <2 x double> %14, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %25 = shufflevector <2 x double> %16, <2 x double> %20, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %26 = shufflevector <4 x double> %22, <4 x double> %23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %27 = shufflevector <4 x double> %24, <4 x double> %25, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %interleaved.vec = shufflevector <8 x double> %26, <8 x double> %27, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> + store <16 x double> %interleaved.vec, ptr %21, align 8 + ret void +} + +declare void @use(<4 x i8>) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll new file mode 100644 index 0000000..e2ff343 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-binops.ll @@ -0,0 +1,204 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX + +declare void @use(<4 x i32>) + +; Shuffle is much cheaper than fdiv. FMF are intersected. + +define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @shuf_fdiv_v4f32_yy( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> [[Z]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> poison, <4 x i32> <i32 1, i32 3, i32 1, i32 3> +; CHECK-NEXT: [[R:%.*]] = fdiv arcp <4 x float> [[TMP2]], [[TMP3]] +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fdiv fast <4 x float> %x, %y + %b1 = fdiv arcp <4 x float> %z, %y + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + ret <4 x float> %r +} + +; Common operand is op0 of the binops. + +define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_add_v4i32_xx( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0> +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0> +; CHECK-NEXT: [[R2:%.*]] = add <4 x i32> [[TMP1]], [[R1]] +; CHECK-NEXT: ret <4 x i32> [[R2]] +; + %b0 = add <4 x i32> %x, %y + %b1 = add <4 x i32> %x, %z + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0> + ret <4 x i32> %r +} + +; For commutative instructions, common operand may be swapped. + +define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <4 x float> @shuf_fmul_v4f32_xx_swap( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y]], <4 x float> [[Z]], <4 x i32> <i32 0, i32 3, i32 4, i32 7> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3> +; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fmul <4 x float> %x, %y + %b1 = fmul <4 x float> %z, %x + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 0, i32 3, i32 4, i32 7> + ret <4 x float> %r +} + +; For commutative instructions, common operand may be swapped. + +define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: define <2 x i64> @shuf_and_v2i64_yy_swap( +; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y]], <2 x i64> poison, <2 x i32> <i32 1, i32 0> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X]], <2 x i64> [[Z]], <2 x i32> <i32 3, i32 0> +; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %b0 = and <2 x i64> %x, %y + %b1 = and <2 x i64> %y, %z + %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0> + ret <2 x i64> %r +} + +; non-commutative binop, but common op0 + +define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 2> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[Z]], <4 x i32> <i32 3, i32 1, i32 1, i32 6> +; CHECK-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]] +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = shl <4 x i32> %x, %y + %b1 = shl <4 x i32> %x, %z + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6> + ret <4 x i32> %r +} + +; negative test - common operand, but not commutable + +define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_shl_v4i32_xx_swap( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z]], [[X]] +; CHECK-NEXT: [[R1:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5> +; CHECK-NEXT: ret <4 x i32> [[R1]] +; + %b0 = shl <4 x i32> %x, %y + %b1 = shl <4 x i32> %z, %x + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 2, i32 2, i32 5> + ret <4 x i32> %r +} + +; negative test - mismatched opcodes + +define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { +; CHECK-LABEL: define <2 x i64> @shuf_sub_add_v2i64_yy( +; CHECK-SAME: <2 x i64> [[X:%.*]], <2 x i64> [[Y:%.*]], <2 x i64> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = sub <2 x i64> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = add <2 x i64> [[Z]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0]], <2 x i64> [[B1]], <2 x i32> <i32 3, i32 0> +; CHECK-NEXT: ret <2 x i64> [[R]] +; + %b0 = sub <2 x i64> %x, %y + %b1 = add <2 x i64> %z, %y + %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0> + ret <2 x i64> %r +} + +; negative test - type change via shuffle + +define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; CHECK-LABEL: define <8 x float> @shuf_fmul_v4f32_xx_type( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z]], [[X]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6> +; CHECK-NEXT: ret <8 x float> [[R]] +; + %b0 = fmul <4 x float> %x, %y + %b1 = fmul <4 x float> %z, %x + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6> + ret <8 x float> %r +} + +; negative test - uses + +define <4 x i32> @shuf_lshr_v4i32_yy_use1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_lshr_v4i32_yy_use1( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = lshr <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: call void @use(<4 x i32> [[B0]]) +; CHECK-NEXT: [[B1:%.*]] = lshr <4 x i32> [[Z]], [[Y]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = lshr <4 x i32> %x, %y + call void @use(<4 x i32> %b0) + %b1 = lshr <4 x i32> %z, %y + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + ret <4 x i32> %r +} + +; negative test - uses + +define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { +; CHECK-LABEL: define <4 x i32> @shuf_mul_v4i32_yy_use2( +; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = mul <4 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = mul <4 x i32> [[Z]], [[Y]] +; CHECK-NEXT: call void @use(<4 x i32> [[B1]]) +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: ret <4 x i32> [[R]] +; + %b0 = mul <4 x i32> %x, %y + %b1 = mul <4 x i32> %z, %y + call void @use(<4 x i32> %b1) + %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + ret <4 x i32> %r +} + +; negative test - must have matching operand + +define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { +; CHECK-LABEL: define <4 x float> @shuf_fadd_v4f32_no_common_op( +; CHECK-SAME: <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x float> [[W:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z]], [[W]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> +; CHECK-NEXT: ret <4 x float> [[R]] +; + %b0 = fadd <4 x float> %x, %y + %b1 = fadd <4 x float> %z, %w + %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + ret <4 x float> %r +} + +; negative test - binops may be relatively cheap + +define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { +; CHECK-LABEL: define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf( +; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]], <16 x i16> [[Z:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X]], [[Y]] +; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22> +; CHECK-NEXT: ret <16 x i16> [[R]] +; + %b0 = and <16 x i16> %x, %y + %b1 = and <16 x i16> %y, %z + %r = shufflevector <16 x i16> %b0, <16 x i16> %b1, <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22> + ret <16 x i16> %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 7d9f7e3..3a5d209 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -1,12 +1,13 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX ; standard vector concatenations define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_zext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-LABEL: define <16 x i32> @concat_zext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -17,8 +18,9 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-LABEL: define <16 x i32> @concat_zext_nneg_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -29,14 +31,16 @@ define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) { -; SSE-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( -; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> +; SSE-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32( +; SSE-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32> +; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1]] to <8 x i32> ; SSE-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; SSE-NEXT: ret <16 x i32> [[R]] ; -; AVX-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( -; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; AVX-LABEL: define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32( +; AVX-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; AVX-NEXT: ret <16 x i32> [[R]] ; @@ -47,8 +51,9 @@ define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a } define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_sext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-LABEL: define <16 x i32> @concat_sext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -59,8 +64,9 @@ define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { } define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { -; CHECK-LABEL: @concat_sext_v4i1_v8i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <8 x i32> @concat_sext_v4i1_v8i32( +; CHECK-SAME: <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0]], <4 x i1> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -71,8 +77,9 @@ define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { } define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_trunc_v4i32_v8i16( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[R]] ; @@ -83,8 +90,9 @@ define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { } define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <8 x ptr> @concat_inttoptr_v4i32_v8iptr( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr> ; CHECK-NEXT: ret <8 x ptr> [[R]] ; @@ -95,8 +103,9 @@ define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { } define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { -; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-LABEL: define <16 x i64> @concat_ptrtoint_v8i16_v16i32( +; CHECK-SAME: <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0]], <8 x ptr> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64> ; CHECK-NEXT: ret <16 x i64> [[R]] ; @@ -107,14 +116,16 @@ define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { } define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { -; SSE-LABEL: @concat_fpext_v4f32_v8f64( -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; SSE-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64( +; SSE-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; SSE-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> ; SSE-NEXT: ret <8 x double> [[R]] ; -; AVX-LABEL: @concat_fpext_v4f32_v8f64( -; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> -; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> +; AVX-LABEL: define <8 x double> @concat_fpext_v4f32_v8f64( +; AVX-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0]] to <4 x double> +; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1]] to <4 x double> ; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; AVX-NEXT: ret <8 x double> [[R]] ; @@ -125,9 +136,10 @@ define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) } define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> %a1) { -; CHECK-LABEL: @concat_fptrunc_v8f64_v16f32( -; CHECK-NEXT: [[X0:%.*]] = fptrunc <8 x double> [[A0:%.*]] to <8 x float> -; CHECK-NEXT: [[X1:%.*]] = fptrunc <8 x double> [[A1:%.*]] to <8 x float> +; CHECK-LABEL: define <16 x float> @concat_fptrunc_v8f64_v16f32( +; CHECK-SAME: <8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = fptrunc <8 x double> [[A0]] to <8 x float> +; CHECK-NEXT: [[X1:%.*]] = fptrunc <8 x double> [[A1]] to <8 x float> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[X0]], <8 x float> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: ret <16 x float> [[R]] ; @@ -140,8 +152,9 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> ; commuted vector concatenation define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @rconcat_sext_v8i16_v16i32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <16 x i32> @rconcat_sext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; @@ -154,8 +167,9 @@ define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; interleaved shuffle define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: @interleave_fpext_v4f32_v8f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> +; CHECK-LABEL: define <8 x double> @interleave_fpext_v4f32_v8f64( +; CHECK-SAME: <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> [[A1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> ; CHECK-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> ; CHECK-NEXT: ret <8 x double> [[R]] ; @@ -168,8 +182,9 @@ define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> % ; bitcasts (same element count) define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <8 x float> @concat_bitcast_v4i32_v8f32( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float> ; CHECK-NEXT: ret <8 x float> [[R]] ; @@ -182,8 +197,9 @@ define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { ; bitcasts (lower element count) define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_bitcast_v8i16_v4f64( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> +; CHECK-LABEL: define <4 x double> @concat_bitcast_v8i16_v4f64( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0]], <8 x i16> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: [[R:%.*]] = bitcast <16 x i16> [[TMP1]] to <4 x double> ; CHECK-NEXT: ret <4 x double> [[R]] ; @@ -196,8 +212,9 @@ define <4 x double> @concat_bitcast_v8i16_v4f64(<8 x i16> %a0, <8 x i16> %a1) { ; bitcasts (higher element count) define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @concat_bitcast_v4i32_v16i16( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <16 x i16> @concat_bitcast_v4i32_v16i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: [[R:%.*]] = bitcast <8 x i32> [[TMP1]] to <16 x i16> ; CHECK-NEXT: ret <16 x i16> [[R]] ; @@ -210,11 +227,12 @@ define <16 x i16> @concat_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ; negative - multiuse define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) { -; CHECK-LABEL: @concat_trunc_v4i32_v8i16_multiuse( -; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16> -; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16> +; CHECK-LABEL: define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], ptr [[A2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0]] to <4 x i16> +; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1]] to <4 x i16> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2:%.*]], align 8 +; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2]], align 8 ; CHECK-NEXT: ret <8 x i16> [[R]] ; %x0 = trunc <4 x i32> %a0 to <4 x i16> @@ -227,9 +245,10 @@ define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1 ; negative - bitcasts (unscalable higher element count) define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { -; CHECK-LABEL: @revpair_bitcast_v4i32_v16i16( -; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <8 x i16> -; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <8 x i16> +; CHECK-LABEL: define <16 x i16> @revpair_bitcast_v4i32_v16i16( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0]] to <8 x i16> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1]] to <8 x i16> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[X0]], <8 x i16> [[X1]], <16 x i32> <i32 1, i32 0, i32 3, i32 3, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> ; CHECK-NEXT: ret <16 x i16> [[R]] ; @@ -242,9 +261,10 @@ define <16 x i16> @revpair_bitcast_v4i32_v16i16(<4 x i32> %a0, <4 x i32> %a1) { ; negative - bitcasts (unscalable element counts) define <4 x i32> @shuffle_bitcast_v32i40_v4i32(<32 x i40> %a0, <32 x i40> %a1) { -; CHECK-LABEL: @shuffle_bitcast_v32i40_v4i32( -; CHECK-NEXT: [[X0:%.*]] = bitcast <32 x i40> [[A0:%.*]] to <40 x i32> -; CHECK-NEXT: [[X1:%.*]] = bitcast <32 x i40> [[A1:%.*]] to <40 x i32> +; CHECK-LABEL: define <4 x i32> @shuffle_bitcast_v32i40_v4i32( +; CHECK-SAME: <32 x i40> [[A0:%.*]], <32 x i40> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = bitcast <32 x i40> [[A0]] to <40 x i32> +; CHECK-NEXT: [[X1:%.*]] = bitcast <32 x i40> [[A1]] to <40 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <40 x i32> [[X0]], <40 x i32> [[X1]], <4 x i32> <i32 0, i32 42, i32 poison, i32 poison> ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -257,9 +277,10 @@ define <4 x i32> @shuffle_bitcast_v32i40_v4i32(<32 x i40> %a0, <32 x i40> %a1) { ; negative - src type mismatch define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) { -; CHECK-LABEL: @concat_sext_v4i8_v4i16_v8i32( -; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0:%.*]] to <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <4 x i16> [[A1:%.*]] to <4 x i32> +; CHECK-LABEL: define <8 x i32> @concat_sext_v4i8_v4i16_v8i32( +; CHECK-SAME: <4 x i8> [[A0:%.*]], <4 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0]] to <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i16> [[A1]] to <4 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -272,9 +293,10 @@ define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) { ; negative - castop mismatch define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_sext_zext_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-LABEL: define <16 x i32> @concat_sext_zext_v8i16_v16i32( +; CHECK-SAME: <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1]] to <8 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> ; CHECK-NEXT: ret <16 x i32> [[R]] ; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll new file mode 100644 index 0000000..b5b5bb99 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-shuffles.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s + +; TODO: fold to identity + +define <8 x i32> @concat_extract_subvectors(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %concat +} + +; negative test - shuffle contains undef + +define <8 x i32> @concat_extract_subvectors_undef(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_undef( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 8> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 8> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 8> + %hi = shufflevector <8 x i32> %x, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 8> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %concat +} + +; negative test - shuffle contains poision + +define <8 x i32> @concat_extract_subvectors_poison(<8 x i32> %x) { +; CHECK-LABEL: define <8 x i32> @concat_extract_subvectors_poison( +; CHECK-SAME: <8 x i32> [[X:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[LO:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 8> +; CHECK-NEXT: [[HI:%.*]] = shufflevector <8 x i32> [[X]], <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 8> +; CHECK-NEXT: [[CONCAT:%.*]] = shufflevector <4 x i32> [[LO]], <4 x i32> [[HI]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-NEXT: ret <8 x i32> [[CONCAT]] +; + %lo = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 8> + %hi = shufflevector <8 x i32> %x, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 8> + %concat = shufflevector <4 x i32> %lo, <4 x i32> %hi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + ret <8 x i32> %concat +} diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 8337bb3..c8c9aa1 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -1,17 +1,21 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX +declare void @use(<4 x i32>) + ; x86 does not have a cheap v16i8 shuffle until SSSE3 (pshufb) define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; SSE-LABEL: define <16 x i8> @bitcast_shuf_narrow_element( +; SSE-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; SSE-NEXT: ret <16 x i8> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8> +; AVX-LABEL: define <16 x i8> @bitcast_shuf_narrow_element( +; AVX-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to <16 x i8> ; AVX-NEXT: [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> ; AVX-NEXT: ret <16 x i8> [[R]] ; @@ -23,8 +27,9 @@ define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) { ; v4f32 is the same cost as v4i32, so this always works define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_same_size( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float> +; CHECK-LABEL: define <4 x float> @bitcast_shuf_same_size( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[V]] to <4 x float> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: ret <4 x float> [[R]] ; @@ -36,13 +41,15 @@ define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) { ; Length-changing shuffles define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element_subvector( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> +; SSE-LABEL: define <16 x i8> @bitcast_shuf_narrow_element_subvector( +; SSE-SAME: <2 x i32> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i32> [[V]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; SSE-NEXT: ret <16 x i8> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element_subvector( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V:%.*]] to <8 x i8> +; AVX-LABEL: define <16 x i8> @bitcast_shuf_narrow_element_subvector( +; AVX-SAME: <2 x i32> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[V]] to <8 x i8> ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> ; AVX-NEXT: ret <16 x i8> [[R]] ; @@ -52,13 +59,15 @@ define <16 x i8> @bitcast_shuf_narrow_element_subvector(<2 x i32> %v) { } define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { -; SSE-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V:%.*]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> +; SSE-LABEL: define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[V]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ; SSE-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <16 x i16> ; SSE-NEXT: ret <16 x i16> [[R]] ; -; AVX-LABEL: @bitcast_shuf_narrow_element_concat_subvectors( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-LABEL: define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[R:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; AVX-NEXT: ret <16 x i16> [[R]] ; @@ -68,8 +77,9 @@ define <16 x i16> @bitcast_shuf_narrow_element_concat_subvectors(<2 x i64> %v) { } define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_extract_subvector( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V:%.*]] to <32 x i8> +; CHECK-LABEL: define <16 x i8> @bitcast_shuf_extract_subvector( +; CHECK-SAME: <8 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i32> [[V]] to <32 x i8> ; CHECK-NEXT: [[R:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> poison, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> ; CHECK-NEXT: ret <16 x i8> [[R]] ; @@ -81,8 +91,9 @@ define <16 x i8> @bitcast_shuf_extract_subvector(<8 x i32> %v) { ; Negative test - must cast to vector type define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_type( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-LABEL: define i128 @bitcast_shuf_narrow_element_wrong_type( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to i128 ; CHECK-NEXT: ret i128 [[R]] ; @@ -94,8 +105,9 @@ define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) { ; Widen shuffle elements define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { -; CHECK-LABEL: @bitcast_shuf_wide_element( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32> +; CHECK-LABEL: define <4 x i32> @bitcast_shuf_wide_element( +; CHECK-SAME: <8 x i16> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[V]] to <4 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> ; CHECK-NEXT: ret <4 x i32> [[R]] ; @@ -104,13 +116,12 @@ define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) { ret <4 x i32> %r } -declare void @use(<4 x i32>) - ; Negative test - don't create an extra shuffle define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { -; CHECK-LABEL: @bitcast_shuf_uses( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> +; CHECK-LABEL: define <16 x i8> @bitcast_shuf_uses( +; CHECK-SAME: <4 x i32> [[V:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; CHECK-NEXT: call void @use(<4 x i32> [[SHUF]]) ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8> ; CHECK-NEXT: ret <16 x i8> [[R]] @@ -125,8 +136,9 @@ define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) { ; TODO - can we remove the empty bitcast(bitcast()) ? define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_remove_bitcasts( -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-LABEL: define <4 x i64> @bitcast_shuf_remove_bitcasts( +; CHECK-SAME: <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[A0]], <2 x i64> [[A1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[SHUF:%.*]] = bitcast <4 x i64> [[R]] to <8 x i32> ; CHECK-NEXT: [[R1:%.*]] = bitcast <8 x i32> [[SHUF]] to <4 x i64> ; CHECK-NEXT: ret <4 x i64> [[R1]] @@ -141,9 +153,10 @@ define <4 x i64> @bitcast_shuf_remove_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { ; shuffle of 2 operands must reduce bitcasts define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_one_bitcast( -; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A1:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> +; CHECK-LABEL: define <8 x i32> @bitcast_shuf_one_bitcast( +; CHECK-SAME: <4 x i32> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[A1]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[A0]], <4 x i32> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %bc0 = bitcast <4 x i32> %a0 to <2 x i64> @@ -155,8 +168,9 @@ define <8 x i32> @bitcast_shuf_one_bitcast(<4 x i32> %a0, <2 x i64> %a1) { ; Negative test - shuffle of 2 operands must not increase bitcasts define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { -; CHECK-LABEL: @bitcast_shuf_too_many_bitcasts( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> +; CHECK-LABEL: define <8 x i32> @bitcast_shuf_too_many_bitcasts( +; CHECK-SAME: <2 x i64> [[A0:%.*]], <2 x i64> [[A1:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i64> [[A0]], <2 x i64> [[A1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; CHECK-NEXT: [[R:%.*]] = bitcast <4 x i64> [[SHUF]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; @@ -166,8 +180,9 @@ define <8 x i32> @bitcast_shuf_too_many_bitcasts(<2 x i64> %a0, <2 x i64> %a1) { } define <2 x i64> @PR35454_1(<2 x i64> %v) { -; SSE-LABEL: @PR35454_1( -; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> +; SSE-LABEL: define <2 x i64> @PR35454_1( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8> ; SSE-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -176,8 +191,9 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; -; AVX-LABEL: @PR35454_1( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> +; AVX-LABEL: define <2 x i64> @PR35454_1( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> @@ -196,8 +212,9 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { } define <2 x i64> @PR35454_2(<2 x i64> %v) { -; SSE-LABEL: @PR35454_2( -; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> +; SSE-LABEL: define <2 x i64> @PR35454_2( +; SSE-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; SSE-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V]] to <4 x i32> ; SSE-NEXT: [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0> ; SSE-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16> ; SSE-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> @@ -206,8 +223,9 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64> ; SSE-NEXT: ret <2 x i64> [[BC3]] ; -; AVX-LABEL: @PR35454_2( -; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> +; AVX-LABEL: define <2 x i64> @PR35454_2( +; AVX-SAME: <2 x i64> [[V:%.*]]) #[[ATTR0]] { +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> @@ -224,187 +242,3 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { %bc3 = bitcast <4 x i32> %permil1 to <2 x i64> ret <2 x i64> %bc3 } - -; Shuffle is much cheaper than fdiv. FMF are intersected. - -define <4 x float> @shuf_fdiv_v4f32_yy(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fdiv_v4f32_yy( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Z:%.*]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 3, i32 1, i32 3> -; CHECK-NEXT: [[R:%.*]] = fdiv arcp <4 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fdiv fast <4 x float> %x, %y - %b1 = fdiv arcp <4 x float> %z, %y - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> - ret <4 x float> %r -} - -; Common operand is op0 of the binops. - -define <4 x i32> @shuf_add_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_add_v4i32_xx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 0> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 0> -; CHECK-NEXT: [[R:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = add <4 x i32> %x, %y - %b1 = add <4 x i32> %x, %z - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 poison, i32 poison, i32 6, i32 0> - ret <4 x i32> %r -} - -; For commutative instructions, common operand may be swapped. - -define <4 x float> @shuf_fmul_v4f32_xx_swap(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fmul_v4f32_xx_swap( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[Z:%.*]], <4 x i32> <i32 0, i32 3, i32 4, i32 7> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 0, i32 3> -; CHECK-NEXT: [[R:%.*]] = fmul <4 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fmul <4 x float> %x, %y - %b1 = fmul <4 x float> %z, %x - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 0, i32 3, i32 4, i32 7> - ret <4 x float> %r -} - -; For commutative instructions, common operand may be swapped. - -define <2 x i64> @shuf_and_v2i64_yy_swap(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { -; CHECK-LABEL: @shuf_and_v2i64_yy_swap( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i64> [[Y:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 0> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i64> [[X:%.*]], <2 x i64> [[Z:%.*]], <2 x i32> <i32 3, i32 0> -; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <2 x i64> [[R]] -; - %b0 = and <2 x i64> %x, %y - %b1 = and <2 x i64> %y, %z - %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0> - ret <2 x i64> %r -} - -; non-commutative binop, but common op0 - -define <4 x i32> @shuf_shl_v4i32_xx(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_shl_v4i32_xx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 2> -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[Z:%.*]], <4 x i32> <i32 3, i32 1, i32 1, i32 6> -; CHECK-NEXT: [[R:%.*]] = shl <4 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = shl <4 x i32> %x, %y - %b1 = shl <4 x i32> %x, %z - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 1, i32 1, i32 6> - ret <4 x i32> %r -} - -; negative test - common operand, but not commutable - -define <4 x i32> @shuf_shl_v4i32_xx_swap(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_shl_v4i32_xx_swap( -; CHECK-NEXT: [[B0:%.*]] = shl <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = shl <4 x i32> [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 3, i32 2, i32 2, i32 5> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = shl <4 x i32> %x, %y - %b1 = shl <4 x i32> %z, %x - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 3, i32 2, i32 2, i32 5> - ret <4 x i32> %r -} - -; negative test - mismatched opcodes - -define <2 x i64> @shuf_sub_add_v2i64_yy(<2 x i64> %x, <2 x i64> %y, <2 x i64> %z) { -; CHECK-LABEL: @shuf_sub_add_v2i64_yy( -; CHECK-NEXT: [[B0:%.*]] = sub <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = add <2 x i64> [[Z:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i64> [[B0]], <2 x i64> [[B1]], <2 x i32> <i32 3, i32 0> -; CHECK-NEXT: ret <2 x i64> [[R]] -; - %b0 = sub <2 x i64> %x, %y - %b1 = add <2 x i64> %z, %y - %r = shufflevector <2 x i64> %b0, <2 x i64> %b1, <2 x i32> <i32 3, i32 0> - ret <2 x i64> %r -} - -; negative test - type change via shuffle - -define <8 x float> @shuf_fmul_v4f32_xx_type(<4 x float> %x, <4 x float> %y, <4 x float> %z) { -; CHECK-LABEL: @shuf_fmul_v4f32_xx_type( -; CHECK-NEXT: [[B0:%.*]] = fmul <4 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = fmul <4 x float> [[Z:%.*]], [[X]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6> -; CHECK-NEXT: ret <8 x float> [[R]] -; - %b0 = fmul <4 x float> %x, %y - %b1 = fmul <4 x float> %z, %x - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <8 x i32> <i32 0, i32 3, i32 4, i32 7, i32 0, i32 1, i32 1, i32 6> - ret <8 x float> %r -} - -; negative test - uses - -define <4 x i32> @shuf_lshr_v4i32_yy_use1(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_lshr_v4i32_yy_use1( -; CHECK-NEXT: [[B0:%.*]] = lshr <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: call void @use(<4 x i32> [[B0]]) -; CHECK-NEXT: [[B1:%.*]] = lshr <4 x i32> [[Z:%.*]], [[Y]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = lshr <4 x i32> %x, %y - call void @use(<4 x i32> %b0) - %b1 = lshr <4 x i32> %z, %y - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 0, i32 2, i32 4, i32 6> - ret <4 x i32> %r -} - -; negative test - uses - -define <4 x i32> @shuf_mul_v4i32_yy_use2(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { -; CHECK-LABEL: @shuf_mul_v4i32_yy_use2( -; CHECK-NEXT: [[B0:%.*]] = mul <4 x i32> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = mul <4 x i32> [[Z:%.*]], [[Y]] -; CHECK-NEXT: call void @use(<4 x i32> [[B1]]) -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[B0]], <4 x i32> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: ret <4 x i32> [[R]] -; - %b0 = mul <4 x i32> %x, %y - %b1 = mul <4 x i32> %z, %y - call void @use(<4 x i32> %b1) - %r = shufflevector <4 x i32> %b0, <4 x i32> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> - ret <4 x i32> %r -} - -; negative test - must have matching operand - -define <4 x float> @shuf_fadd_v4f32_no_common_op(<4 x float> %x, <4 x float> %y, <4 x float> %z, <4 x float> %w) { -; CHECK-LABEL: @shuf_fadd_v4f32_no_common_op( -; CHECK-NEXT: [[B0:%.*]] = fadd <4 x float> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = fadd <4 x float> [[Z:%.*]], [[W:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[B0]], <4 x float> [[B1]], <4 x i32> <i32 1, i32 3, i32 5, i32 7> -; CHECK-NEXT: ret <4 x float> [[R]] -; - %b0 = fadd <4 x float> %x, %y - %b1 = fadd <4 x float> %z, %w - %r = shufflevector <4 x float> %b0, <4 x float> %b1, <4 x i32> <i32 1, i32 3, i32 5, i32 7> - ret <4 x float> %r -} - -; negative test - binops may be relatively cheap - -define <16 x i16> @shuf_and_v16i16_yy_expensive_shuf(<16 x i16> %x, <16 x i16> %y, <16 x i16> %z) { -; CHECK-LABEL: @shuf_and_v16i16_yy_expensive_shuf( -; CHECK-NEXT: [[B0:%.*]] = and <16 x i16> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[B1:%.*]] = and <16 x i16> [[Y]], [[Z:%.*]] -; CHECK-NEXT: [[R:%.*]] = shufflevector <16 x i16> [[B0]], <16 x i16> [[B1]], <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22> -; CHECK-NEXT: ret <16 x i16> [[R]] -; - %b0 = and <16 x i16> %x, %y - %b1 = and <16 x i16> %y, %z - %r = shufflevector <16 x i16> %b0, <16 x i16> %b1, <16 x i32> <i32 15, i32 22, i32 25, i32 13, i32 28, i32 0, i32 poison, i32 3, i32 0, i32 30, i32 3, i32 7, i32 9, i32 19, i32 2, i32 22> - ret <16 x i16> %r -} diff --git a/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml b/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml index 71cf38b..dd0c90d 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml +++ b/llvm/test/Transforms/WholeProgramDevirt/Inputs/export.yaml @@ -5,14 +5,22 @@ GlobalValueMap: TypeTestAssumeVCalls: - GUID: 14276520915468743435 # typeid1 Offset: 0 + - GUID: 271751036925422857 # typeid1_rv + Offset: 0 TypeCheckedLoadVCalls: - GUID: 15427464259790519041 # typeid2 Offset: 0 + - GUID: 1146149264729288256 # typeid2_rv + Offset: 0 TypeTestAssumeConstVCalls: - VFunc: GUID: 3515965990081467659 # typeid3 Offset: 0 Args: [12, 24] + - VFunc: + GUID: 2777626534618191571 # typeid3_rv + Offset: 0 + Args: [12, 24] TypeCheckedLoadConstVCalls: - VFunc: GUID: 17525413373118030901 # typeid4 diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index b55713f..0b1023e 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -8,6 +8,45 @@ ; RUN: FileCheck --check-prefix=SUMMARY %s < %t ; SUMMARY: TypeIdMap: +; SUMMARY-NEXT: typeid1_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid2_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: Indir +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: +; SUMMARY-NEXT: typeid3_rv: +; SUMMARY-NEXT: TTRes: +; SUMMARY-NEXT: Kind: Unknown +; SUMMARY-NEXT: SizeM1BitWidth: 0 +; SUMMARY-NEXT: AlignLog2: 0 +; SUMMARY-NEXT: SizeM1: 0 +; SUMMARY-NEXT: BitMask: 0 +; SUMMARY-NEXT: InlineBits: 0 +; SUMMARY-NEXT: WPDRes: +; SUMMARY-NEXT: 0: +; SUMMARY-NEXT: Kind: BranchFunnel +; SUMMARY-NEXT: SingleImplName: '' +; SUMMARY-NEXT: ResByArg: ; SUMMARY-NEXT: typeid3: ; SUMMARY-NEXT: TTRes: ; SUMMARY-NEXT: Kind: Unknown @@ -93,6 +132,29 @@ declare i32 @vf3_2(ptr %this, i32 %arg) declare i32 @vf4_1(ptr %this, i32 %arg) declare i32 @vf4_2(ptr %this, i32 %arg) +declare ptr @llvm.load.relative.i32(ptr, i32) + +;; These are relative vtables equivalent to the ones above. +@vt1_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_1 to i64), i64 ptrtoint (ptr @vt1_1_rv to i64)) to i32)], !type !5 +@vt1_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1_2 to i64), i64 ptrtoint (ptr @vt1_2_rv to i64)) to i32)], !type !5 + +@vt2_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_1 to i64), i64 ptrtoint (ptr @vt2_1_rv to i64)) to i32)], !type !6 +@vt2_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_2 to i64), i64 ptrtoint (ptr @vt2_2_rv to i64)) to i32)], !type !6 +@vt2_3_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_3 to i64), i64 ptrtoint (ptr @vt2_3_rv to i64)) to i32)], !type !6 +@vt2_4_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_4 to i64), i64 ptrtoint (ptr @vt2_4_rv to i64)) to i32)], !type !6 +@vt2_5_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_5 to i64), i64 ptrtoint (ptr @vt2_5_rv to i64)) to i32)], !type !6 +@vt2_6_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_6 to i64), i64 ptrtoint (ptr @vt2_6_rv to i64)) to i32)], !type !6 +@vt2_7_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_7 to i64), i64 ptrtoint (ptr @vt2_7_rv to i64)) to i32)], !type !6 +@vt2_8_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_8 to i64), i64 ptrtoint (ptr @vt2_8_rv to i64)) to i32)], !type !6 +@vt2_9_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_9 to i64), i64 ptrtoint (ptr @vt2_9_rv to i64)) to i32)], !type !6 +@vt2_10_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_10 to i64), i64 ptrtoint (ptr @vt2_10_rv to i64)) to i32)], !type !6 +@vt2_11_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2_11 to i64), i64 ptrtoint (ptr @vt2_11_rv to i64)) to i32)], !type !6 + +@vt3_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_1 to i64), i64 ptrtoint (ptr @vt3_1_rv to i64)) to i32)], !type !7 +@vt3_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf3_2 to i64), i64 ptrtoint (ptr @vt3_2_rv to i64)) to i32)], !type !7 + +@vt4_1_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_1 to i64), i64 ptrtoint (ptr @vt4_1_rv to i64)) to i32)], !type !8 +@vt4_2_rv = constant [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4_2 to i64), i64 ptrtoint (ptr @vt4_2_rv to i64)) to i32)], !type !8 ; CHECK-LABEL: define i32 @fn1 @@ -108,6 +170,19 @@ define i32 @fn1(ptr %obj) #0 { ret i32 %result } +; CHECK-LABEL: define i32 @fn1_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn1_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest %vtable, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + ; CHECK-LABEL: define i32 @fn2 ; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn2(ptr %obj) #0 { @@ -120,6 +195,18 @@ define i32 @fn2(ptr %obj) #0 { ret i32 %result } +; CHECK-LABEL: define i32 @fn2_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn2_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call i32 % + %result = call i32 %fptr(ptr %obj, i32 1) + ret i32 %result +} + ; CHECK-LABEL: define i32 @fn3 ; CHECK-NOT: call void (...) @llvm.icall.branch.funnel define i32 @fn3(ptr %obj) #0 { @@ -133,10 +220,75 @@ define i32 @fn3(ptr %obj) #0 { ret i32 %result } +; CHECK-LABEL: define i32 @fn3_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn3_rv(ptr %obj) #0 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !9) + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; RETP: call i32 @branch_funnel.1(ptr + ; NORETP: call i32 % + %result = call i32 %fptr(ptr %obj, i32 1) + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + ; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) ; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...) +; CHECK-LABEL: define hidden void @__typeid_typeid1_rv_0_branch_funnel(ptr nest %0, ...) +; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1_rv, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2_rv, ptr {{(nonnull )?}}@vf1_2, ...) + ; CHECK: define internal void @branch_funnel(ptr +; CHECK: define internal void @branch_funnel.1(ptr declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -146,5 +298,10 @@ declare void @llvm.assume(i1) !2 = !{i32 0, !"typeid3"} !3 = !{i32 0, !4} !4 = distinct !{} +!5 = !{i32 0, !"typeid1_rv"} +!6 = !{i32 0, !"typeid2_rv"} +!7 = !{i32 0, !"typeid3_rv"} +!8 = !{i32 0, !9} +!9 = distinct !{} attributes #0 = { "target-features"="+retpoline" } diff --git a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll index 91bae4a..e685a1a 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/constant-arg.ll @@ -7,6 +7,10 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x ptr] [ptr @vf2], [0 x i8] zeroinitializer }, !type [[T8]] ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x ptr] [ptr @vf4], [0 x i8] zeroinitializer }, !type [[T8]] ; CHECK: private constant { [8 x i8], [1 x ptr], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x ptr] [ptr @vf8], [0 x i8] zeroinitializer }, !type [[T8]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\01", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1 to i64), i64 ptrtoint (ptr @vt1_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4:![0-9]+]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\02", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2 to i64), i64 ptrtoint (ptr @vt2_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\01", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4 to i64), i64 ptrtoint (ptr @vt4_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] +; CHECK: private constant { [4 x i8], [1 x i32], [0 x i8] } { [4 x i8] c"\00\00\00\02", [1 x i32] [i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf8 to i64), i64 ptrtoint (ptr @vt8_rv to i64)) to i32)], [0 x i8] zeroinitializer }, align 4, !type [[T4]] @vt1 = constant [1 x ptr] [ptr @vf1], !type !0 @vt2 = constant [1 x ptr] [ptr @vf2], !type !0 @@ -61,8 +65,49 @@ define i1 @call2(ptr %obj) { ret i1 %result } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt1_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf1 to i64), i64 ptrtoint (ptr @vt1_rv to i64)) to i32) +], align 4, !type !1 +@vt2_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf2 to i64), i64 ptrtoint (ptr @vt2_rv to i64)) to i32) +], align 4, !type !1 +@vt4_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf4 to i64), i64 ptrtoint (ptr @vt4_rv to i64)) to i32) +], align 4, !type !1 +@vt8_rv = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf8 to i64), i64 ptrtoint (ptr @vt8_rv to i64)) to i32) +], align 4, !type !1 + +; CHECK: define i1 @call3 +define i1 @call3(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: getelementptr {{.*}} -1 + ; CHECK: and {{.*}}, 1 + %result = call i1 %fptr(ptr %obj, i32 5) + ret i1 %result +} + +; CHECK: define i1 @call4 +define i1 @call4(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: getelementptr {{.*}} -1 + ; CHECK: and {{.*}}, 2 + %result = call i1 %fptr(ptr %obj, i32 10) + ret i1 %result +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) ; CHECK: [[T8]] = !{i32 8, !"typeid"} +; CHECK: [[T4]] = !{i32 4, !"typeid2"} !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll index dc7b202..5a9e6c1 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll @@ -4,6 +4,7 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" ; CHECK: remark: <unknown>:0:0: single-impl: devirtualized a call to vf +; CHECK: remark: <unknown>:0:0: single-impl: devirtualized a call to vf ; CHECK: remark: <unknown>:0:0: devirtualized vf ; CHECK-NOT: devirtualized @@ -33,7 +34,31 @@ trap: unreachable } +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !1 + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %pair = call {ptr, i1} @llvm.type.checked.load(ptr %vtable, i32 0, metadata !"typeid2") + %fptr = extractvalue {ptr, i1} %pair, 0 + %p = extractvalue {ptr, i1} %pair, 1 + ; CHECK: br i1 true, + br i1 %p, label %cont, label %trap + +cont: + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void + +trap: + call void @llvm.trap() + unreachable +} + declare {ptr, i1} @llvm.type.checked.load(ptr, i32, metadata) declare void @llvm.trap() !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll index ed144c2..bfbbeaf 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl-multiple-assumes.ll @@ -23,7 +23,27 @@ define void @call(ptr %obj) { ret void } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !1 + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %p2 = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p2) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll index 1ce96f7..d7f33df 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll @@ -7,6 +7,8 @@ target datalayout = "e-p:64:64" target triple = "x86_64-unknown-linux-gnu" ; CHECK: remark: devirt-single.cc:30:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:41:32: single-impl: devirtualized a call to vf +; CHECK: remark: devirt-single.cc:51:32: single-impl: devirtualized a call to vf ; CHECK: remark: devirt-single.cc:13:0: devirtualized vf ; CHECK-NOT: devirtualized @@ -28,6 +30,41 @@ define void @call(ptr %obj) #1 !dbg !5 { ret void } +declare ptr @llvm.load.relative.i32(ptr, i32) + +@vt3 = private unnamed_addr constant [1 x i32] [ + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt3 to i64)) to i32) +], align 4, !type !11 + +; CHECK: define void @call2 +define void @call2(ptr %obj) #1 !dbg !9 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj), !dbg !10 + ret void +} + +@_ZTV1A.local = private unnamed_addr constant { [3 x i32] } { [3 x i32] [ + i32 0, ; offset to top + i32 0, ; rtti + i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr getelementptr inbounds ({ [3 x i32] }, ptr @_ZTV1A.local, i32 0, i32 0, i32 2) to i64)) to i32) ; vfunc offset +] }, align 4, !type !14 + +; CHECK: define void @call3 +define void @call3(ptr %obj) #1 !dbg !12 { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid3") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 8) + ; CHECK: call void @vf( + call void %fptr(ptr %obj), !dbg !13 + ret void +} + + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) @@ -45,5 +82,13 @@ declare void @llvm.assume(i1) !7 = distinct !DISubprogram(name: "vf", linkageName: "_ZN3vt12vfEv", scope: !1, file: !1, line: 13, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0) !8 = !{i32 0, !"typeid"} +!9 = distinct !DISubprogram(name: "call2", linkageName: "_Z5call2Pv", scope: !1, file: !1, line: 40, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!10 = !DILocation(line: 41, column: 32, scope: !9) +!11 = !{i32 0, !"typeid2"} + +!12 = distinct !DISubprogram(name: "call3", linkageName: "_Z5call3Pv", scope: !1, file: !1, line: 50, isLocal: false, isDefinition: true, scopeLine: 9, flags: DIFlagPrototyped, isOptimized: false, unit: !0) +!13 = !DILocation(line: 51, column: 32, scope: !12) +!14 = !{i32 0, !"typeid3"} + ; CHECK: 1 wholeprogramdevirt - Number of whole program devirtualization targets -; CHECK: 1 wholeprogramdevirt - Number of single implementation devirtualizations +; CHECK: 3 wholeprogramdevirt - Number of single implementation devirtualizations diff --git a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll index 3c5d9b8..062cef9 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/pointer-vtable.ll @@ -20,7 +20,23 @@ define void @call(ptr %obj) { ret void } +@vt2 = constant i32 trunc (i64 sub (i64 ptrtoint (ptr dso_local_equivalent @vf to i64), i64 ptrtoint (ptr @vt2 to i64)) to i32), !type !1 + +declare ptr @llvm.load.relative.i32(ptr, i32) + +; CHECK: define void @call2 +define void @call2(ptr %obj) { + %vtable = load ptr, ptr %obj + %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr %vtable, i32 0) + ; CHECK: call void @vf( + call void %fptr(ptr %obj) + ret void +} + declare i1 @llvm.type.test(ptr, metadata) declare void @llvm.assume(i1) !0 = !{i32 0, !"typeid"} +!1 = !{i32 0, !"typeid2"} diff --git a/llvm/test/Verifier/tbaa-struct.ll b/llvm/test/Verifier/tbaa-struct.ll index b8ddc7c..14c19a1 100644 --- a/llvm/test/Verifier/tbaa-struct.ll +++ b/llvm/test/Verifier/tbaa-struct.ll @@ -1,28 +1,36 @@ -; RUN: llvm-as < %s 2>&1 - -; FIXME: The verifer should reject the invalid !tbaa.struct nodes below. +; RUN: not llvm-as < %s 2>&1 | FileCheck %s define void @test_overlapping_regions(ptr %a1) { +; CHECK: Overlapping tbaa.struct regions +; CHECK-NEXT: %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 %ld = load i8, ptr %a1, align 1, !tbaa.struct !0 ret void } define void @test_size_not_integer(ptr %a1) { +; CHECK: Size must be a constant integer +; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !5 store i8 1, ptr %a1, align 1, !tbaa.struct !5 ret void } define void @test_offset_not_integer(ptr %a1, ptr %a2) { +; CHECK: Offset must be a constant integer +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !6 ret void } define void @test_tbaa_missing(ptr %a1, ptr %a2) { +; CHECK: TBAA tag missing +; CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 tail call void @llvm.memcpy.p0.p0.i64(ptr align 8 %a1, ptr align 8 %a2, i64 16, i1 false), !tbaa.struct !7 ret void } define void @test_tbaa_invalid(ptr %a1) { +; CHECK: Old-style TBAA is no longer allowed, use struct-path TBAA instead +; CHECK-NEXT: store i8 1, ptr %a1, align 1, !tbaa.struct !8 store i8 1, ptr %a1, align 1, !tbaa.struct !8 ret void } diff --git a/llvm/test/Verifier/variadic.ll b/llvm/test/Verifier/variadic.ll new file mode 100644 index 0000000..55e4a4d --- /dev/null +++ b/llvm/test/Verifier/variadic.ll @@ -0,0 +1,8 @@ +; RUN: not opt -S -passes=verify 2>&1 < %s | FileCheck %s + +; CHECK: va_start called in a non-varargs function +declare void @llvm.va_start(ptr) +define void @not_vararg(ptr %p) nounwind { + call void @llvm.va_start(ptr %p) + ret void +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll new file mode 100644 index 0000000..4eb05b9 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll @@ -0,0 +1,39 @@ +; RUN: opt < %s -S | FileCheck %s + +define i32 @phi_after_label(i1 %cc) { +entry: + br i1 %cc, label %then, label %end + +then: + br label %end + +end: + %r = phi i32 [ 0, %entry ], [ 1, %then ] + ret i32 %r +} + +define void @phi_before_label(i32 %bound) { +entry: + br label %loop + +loop: + %ctr = phi i32 [ 0, %entry ], [ %ctr.next, %loop ] + %ctr.next = add i32 %ctr, 1 + %cc = icmp ult i32 %ctr.next, %bound + br i1 %cc, label %loop, label %end + +end: + ret void +} + +define i32 @phi_after_label_unnamed(i1 %cc) { +0: + br i1 %cc, label %1, label %2 + +1: + br label %2 + +2: + %r = phi i32 [ 0, %0 ], [ 1, %1 ] + ret i32 %r +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected new file mode 100644 index 0000000..1d21ebe --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/phi-labels.ll.expected @@ -0,0 +1,71 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt < %s -S | FileCheck %s + +define i32 @phi_after_label(i1 %cc) { +; CHECK-LABEL: define i32 @phi_after_label( +; CHECK-SAME: i1 [[CC:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[CC]], label [[THEN:%.*]], label [[END:%.*]] +; CHECK: then: +; CHECK-NEXT: br label [[END]] +; CHECK: end: +; CHECK-NEXT: [[R:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[THEN]] ] +; CHECK-NEXT: ret i32 [[R]] +; +entry: + br i1 %cc, label %then, label %end + +then: + br label %end + +end: + %r = phi i32 [ 0, %entry ], [ 1, %then ] + ret i32 %r +} + +define void @phi_before_label(i32 %bound) { +; CHECK-LABEL: define void @phi_before_label( +; CHECK-SAME: i32 [[BOUND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[CTR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CTR_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CTR_NEXT]] = add i32 [[CTR]], 1 +; CHECK-NEXT: [[CC:%.*]] = icmp ult i32 [[CTR_NEXT]], [[BOUND]] +; CHECK-NEXT: br i1 [[CC]], label [[LOOP]], label [[END:%.*]] +; CHECK: end: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %ctr = phi i32 [ 0, %entry ], [ %ctr.next, %loop ] + %ctr.next = add i32 %ctr, 1 + %cc = icmp ult i32 %ctr.next, %bound + br i1 %cc, label %loop, label %end + +end: + ret void +} + +define i32 @phi_after_label_unnamed(i1 %cc) { +; CHECK-LABEL: define i32 @phi_after_label_unnamed( +; CHECK-SAME: i1 [[CC:%.*]]) { +; CHECK-NEXT: br i1 [[CC]], label [[TMP1:%.*]], label [[TMP2:%.*]] +; CHECK: 1: +; CHECK-NEXT: br label [[TMP2]] +; CHECK: 2: +; CHECK-NEXT: [[R:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ 1, [[TMP1]] ] +; CHECK-NEXT: ret i32 [[R]] +; +0: + br i1 %cc, label %1, label %2 + +1: + br label %2 + +2: + %r = phi i32 [ 0, %0 ], [ 1, %1 ] + ret i32 %r +} diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll new file mode 100644 index 0000000..b4fd23a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} + +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[Y]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected new file mode 100644 index 0000000..86f929f --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/stable_ir_values_funcs.ll.expected @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 3 +; RUN: opt < %s -S | FileCheck %s + +; The assumption underlying this test is that there are pre-existing check lines +; but something has changed, and we would like to avoid needless changes of +; meta variable names so that diffs end up being easier to read, e.g. avoid +; changing X_I33 into X_I34 or renumbering the various TMP variables. + +define i32 @func({i32, i32} %x, i32 %y) { + %x.i34 = extractvalue {i32, i32} %x, 0 + %1 = add i32 %y, 1 + %2 = add i32 %x.i34, %1 + %3 = mul i32 %2, 3 + ret i32 %3 +} + +; CHECK-LABEL: define i32 @func( +; CHECK-SAME: { i32, i32 } [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: [[X_I33:%.*]] = extractvalue { i32, i32 } [[X]], 0 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[Y]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X_I33]], [[TMP3]] +; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 3 +; CHECK-NEXT: ret i32 [[TMP2]] +; diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll new file mode 100644 index 0000000..9a9cc0a --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll @@ -0,0 +1,168 @@ +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.expected new file mode 100644 index 0000000..1f9c37c --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.expected @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, !DIAssignID [[DIASSIGNID16:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: #dbg_assign(i1 undef, [[META13:![0-9]+]], !DIExpression(), [[DIASSIGNID16]], ptr [[A_ADDR]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META13]], !DIExpression(), [[META17]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2:[0-9]+]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META23]], !tbaa [[TBAA24:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG22]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG26:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG31:![0-9]+]], !prof [[PROF32:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG34]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG34]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG37:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG37]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG37]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +; CHECK-LABEL: @bar( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG47:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META44:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META48]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG47]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG51:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG52:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG53:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG54:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG55:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG56:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG57:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG56]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG56]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG56]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG59]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG59]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP60:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.expected new file mode 100644 index 0000000..5905e44 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.expected @@ -0,0 +1,240 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, !DIAssignID [[DIASSIGNID16:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: #dbg_assign(i1 undef, [[META13:![0-9]+]], !DIExpression(), [[DIASSIGNID16]], ptr [[A_ADDR]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META13]], !DIExpression(), [[META17]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2:[0-9]+]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META23]], !tbaa [[TBAA24:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG22]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG26:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG31:![0-9]+]], !prof [[PROF32:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG34]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG34]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG37:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG37]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG37]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0]] !dbg [[DBG41:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG47:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META44:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META48]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG47]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG51:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG52:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG53:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG54:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG55:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG56:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG57:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG56]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG56]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG56]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG59]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG59]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP60:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.globals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.globals.expected new file mode 100644 index 0000000..579d6a4 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.globals.expected @@ -0,0 +1,309 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +; CHECK-LABEL: define {{[^@]+}}@foo +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, !DIAssignID [[DIASSIGNID16:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: #dbg_assign(i1 undef, [[META13:![0-9]+]], !DIExpression(), [[DIASSIGNID16]], ptr [[A_ADDR]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META13]], !DIExpression(), [[META17]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2:[0-9]+]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META23]], !tbaa [[TBAA24:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG22]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG26:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG31:![0-9]+]], !prof [[PROF32:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG34]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG34]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG37:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG37]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG37]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +; CHECK-LABEL: define {{[^@]+}}@bar +; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR0]] !dbg [[DBG41:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG47:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META44:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META48]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG47]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG51:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG52:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG53:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG54:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG55:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG56:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG57:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG56]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG56]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG56]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG59]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG59]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP60:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) +;. +; CHECK: attributes #[[ATTR0]] = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR2]] = { nounwind } +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) +; CHECK: [[META1]] = !DIFile(filename: "various_ir_values.c", directory: {{.*}}) +; CHECK: [[META2]] = !{} +; CHECK: [[META3:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} +; CHECK: [[META4:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} +; CHECK: [[META5:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} +; CHECK: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; CHECK: [[DBG7]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 1, type: [[META8:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META12:![0-9]+]]) +; CHECK: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; CHECK: [[META9]] = !{null, [[META10:![0-9]+]]} +; CHECK: [[META10]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META11:![0-9]+]], size: 64) +; CHECK: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; CHECK: [[META12]] = !{[[META13]], [[META14]]} +; CHECK: [[META13]] = !DILocalVariable(name: "A", arg: 1, scope: [[DBG7]], file: [[META1]], line: 1, type: [[META10]]) +; CHECK: [[META14]] = !DILocalVariable(name: "i", scope: [[META15:![0-9]+]], file: [[META1]], line: 3, type: [[META11]]) +; CHECK: [[META15]] = distinct !DILexicalBlock(scope: [[DBG7]], file: [[META1]], line: 3, column: 3) +; CHECK: [[DIASSIGNID16]] = distinct !DIAssignID() +; CHECK: [[META17]] = !DILocation(line: 1, column: 15, scope: [[DBG7]]) +; CHECK: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +; CHECK: [[META19]] = !{!"any pointer", [[META20:![0-9]+]], i64 0} +; CHECK: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} +; CHECK: [[META21]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[DBG22]] = !DILocation(line: 3, column: 8, scope: [[META15]]) +; CHECK: [[META23]] = !DILocation(line: 3, column: 12, scope: [[META15]]) +; CHECK: [[TBAA24]] = !{[[META25:![0-9]+]], [[META25]], i64 0} +; CHECK: [[META25]] = !{!"int", [[META20]], i64 0} +; CHECK: [[DBG26]] = !DILocation(line: 3, column: 19, scope: [[META27:![0-9]+]]) +; CHECK: [[META27]] = distinct !DILexicalBlock(scope: [[META15]], file: [[META1]], line: 3, column: 3) +; CHECK: [[DBG28]] = !DILocation(line: 3, column: 24, scope: [[META27]]) +; CHECK: [[DBG29]] = !DILocation(line: 3, column: 23, scope: [[META27]]) +; CHECK: [[DBG30]] = !DILocation(line: 3, column: 21, scope: [[META27]]) +; CHECK: [[DBG31]] = !DILocation(line: 3, column: 3, scope: [[META15]]) +; CHECK: [[PROF32]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[DBG33]] = !DILocation(line: 3, column: 3, scope: [[META27]]) +; CHECK: [[DBG34]] = !DILocation(line: 4, column: 5, scope: [[META27]]) +; CHECK: [[DBG35]] = !DILocation(line: 4, column: 7, scope: [[META27]]) +; CHECK: [[DBG36]] = !DILocation(line: 4, column: 10, scope: [[META27]]) +; CHECK: [[DBG37]] = !DILocation(line: 3, column: 27, scope: [[META27]]) +; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[DBG31]], [[META39:![0-9]+]]} +; CHECK: [[META39]] = !DILocation(line: 4, column: 12, scope: [[META15]]) +; CHECK: [[DBG40]] = !DILocation(line: 5, column: 1, scope: [[DBG7]]) +; CHECK: [[DBG41]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 7, type: [[META8]], scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META42:![0-9]+]]) +; CHECK: [[META42]] = !{[[META43]], [[META44]]} +; CHECK: [[META43]] = !DILocalVariable(name: "A", arg: 1, scope: [[DBG41]], file: [[META1]], line: 7, type: [[META10]]) +; CHECK: [[META44]] = !DILocalVariable(name: "i", scope: [[META45:![0-9]+]], file: [[META1]], line: 9, type: [[META11]]) +; CHECK: [[META45]] = distinct !DILexicalBlock(scope: [[DBG41]], file: [[META1]], line: 9, column: 3) +; CHECK: [[META46]] = !DILocation(line: 7, column: 15, scope: [[DBG41]]) +; CHECK: [[DBG47]] = !DILocation(line: 9, column: 8, scope: [[META45]]) +; CHECK: [[META48]] = !DILocation(line: 9, column: 12, scope: [[META45]]) +; CHECK: [[DBG49]] = !DILocation(line: 9, column: 19, scope: [[META50:![0-9]+]]) +; CHECK: [[META50]] = distinct !DILexicalBlock(scope: [[META45]], file: [[META1]], line: 9, column: 3) +; CHECK: [[DBG51]] = !DILocation(line: 9, column: 24, scope: [[META50]]) +; CHECK: [[DBG52]] = !DILocation(line: 9, column: 23, scope: [[META50]]) +; CHECK: [[DBG53]] = !DILocation(line: 9, column: 21, scope: [[META50]]) +; CHECK: [[DBG54]] = !DILocation(line: 9, column: 3, scope: [[META45]]) +; CHECK: [[DBG55]] = !DILocation(line: 9, column: 3, scope: [[META50]]) +; CHECK: [[DBG56]] = !DILocation(line: 10, column: 5, scope: [[META50]]) +; CHECK: [[DBG57]] = !DILocation(line: 10, column: 7, scope: [[META50]]) +; CHECK: [[DBG58]] = !DILocation(line: 10, column: 10, scope: [[META50]]) +; CHECK: [[DBG59]] = !DILocation(line: 9, column: 27, scope: [[META50]]) +; CHECK: [[LOOP60]] = distinct !{[[LOOP60]], [[DBG54]], [[META61:![0-9]+]]} +; CHECK: [[META61]] = !DILocation(line: 10, column: 12, scope: [[META45]]) +; CHECK: [[DBG62]] = !DILocation(line: 11, column: 1, scope: [[DBG41]]) +;. diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.noglobals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.noglobals.expected new file mode 100644 index 0000000..1f9c37c --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.noglobals.expected @@ -0,0 +1,238 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, !DIAssignID [[DIASSIGNID16:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: #dbg_assign(i1 undef, [[META13:![0-9]+]], !DIExpression(), [[DIASSIGNID16]], ptr [[A_ADDR]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META13]], !DIExpression(), [[META17]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2:[0-9]+]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META23]], !tbaa [[TBAA24:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG22]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG26:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG31:![0-9]+]], !prof [[PROF32:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG34]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG34]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG37:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG37]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG37]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +; CHECK-LABEL: @bar( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG47:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META44:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META48]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG47]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG51:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG52:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG53:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG54:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG55:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG56:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG57:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG56]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG56]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG56]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG59]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG59]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP60:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.transitiveglobals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.transitiveglobals.expected new file mode 100644 index 0000000..e2c4260 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values_dbgrecords.ll.funcsig.transitiveglobals.expected @@ -0,0 +1,299 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart +; Just run it through opt, no passes needed. +; RUN: opt < %s -S --write-experimental-debuginfo=true | FileCheck %s + +; ModuleID = 'various_ir_values.c' +source_filename = "various_ir_values.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define dso_local void @foo(ptr %A) #0 !dbg !7 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, !DIAssignID [[DIASSIGNID16:![0-9]+]] +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: #dbg_assign(i1 undef, [[META13:![0-9]+]], !DIExpression(), [[DIASSIGNID16]], ptr [[A_ADDR]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META13]], !DIExpression(), [[META17]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2:[0-9]+]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META23]], !tbaa [[TBAA24:![0-9]+]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG22]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG26:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG28:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG31:![0-9]+]], !prof [[PROF32:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG35:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG34]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG34]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG36:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG34]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG37:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG37]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG37]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP38:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8, !DIAssignID !16 + %i = alloca i32, align 4 + #dbg_assign(i1 undef, !13, !DIExpression(), !16, ptr %A.addr, !DIExpression(), !17) + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !13, !DIExpression(), !17) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !22 + #dbg_declare(ptr %i, !14, !DIExpression(), !23) + store i32 0, ptr %i, align 4, !dbg !23, !tbaa !24 + br label %for.cond, !dbg !22 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !26, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !28, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !29, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !30 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !31, !prof !32 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !33 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !34, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !35, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !34 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !34 + store i32 0, ptr %arrayidx, align 4, !dbg !36, !tbaa !24 + br label %for.inc, !dbg !34 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !37, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !37 + store i32 %inc, ptr %i, align 4, !dbg !37, !tbaa !24 + br label %for.cond, !dbg !33, !llvm.loop !38 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !40 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nounwind uwtable +define dso_local void @bar(ptr %A) #0 !dbg !41 { +; CHECK-LABEL: @bar( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +; CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8, !tbaa [[TBAA18]] +; CHECK-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META43:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG47:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[I]], [[META44:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +; CHECK-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META48]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG47]] +; CHECK: for.cond: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG51:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG52:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], [[TMP2]], !dbg [[DBG53:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]], !dbg [[DBG54:![0-9]+]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[I]]) #[[ATTR2]], !dbg [[DBG55:![0-9]+]] +; CHECK-NEXT: br label [[FOR_END:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG56:![0-9]+]], !tbaa [[TBAA18]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG57:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64, !dbg [[DBG56]] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]], !dbg [[DBG56]] +; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX]], align 4, !dbg [[DBG58:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG56]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG59:![0-9]+]], !tbaa [[TBAA24]] +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1, !dbg [[DBG59]] +; CHECK-NEXT: store i32 [[INC]], ptr [[I]], align 4, !dbg [[DBG59]], !tbaa [[TBAA24]] +; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG55]], !llvm.loop [[LOOP60:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +; +entry: + %A.addr = alloca ptr, align 8 + %i = alloca i32, align 4 + store ptr %A, ptr %A.addr, align 8, !tbaa !18 + #dbg_declare(ptr %A.addr, !43, !DIExpression(), !46) + call void @llvm.lifetime.start.p0(i64 4, ptr %i) #2, !dbg !47 + #dbg_declare(ptr %i, !44, !DIExpression(), !48) + store i32 0, ptr %i, align 4, !dbg !48, !tbaa !24 + br label %for.cond, !dbg !47 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, ptr %i, align 4, !dbg !49, !tbaa !24 + %1 = load ptr, ptr %A.addr, align 8, !dbg !51, !tbaa !18 + %2 = load i32, ptr %1, align 4, !dbg !52, !tbaa !24 + %cmp = icmp slt i32 %0, %2, !dbg !53 + br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !54 + +for.cond.cleanup: ; preds = %for.cond + call void @llvm.lifetime.end.p0(i64 4, ptr %i) #2, !dbg !55 + br label %for.end + +for.body: ; preds = %for.cond + %3 = load ptr, ptr %A.addr, align 8, !dbg !56, !tbaa !18 + %4 = load i32, ptr %i, align 4, !dbg !57, !tbaa !24 + %idxprom = sext i32 %4 to i64, !dbg !56 + %arrayidx = getelementptr inbounds i32, ptr %3, i64 %idxprom, !dbg !56 + store i32 0, ptr %arrayidx, align 4, !dbg !58, !tbaa !24 + br label %for.inc, !dbg !56 + +for.inc: ; preds = %for.body + %5 = load i32, ptr %i, align 4, !dbg !59, !tbaa !24 + %inc = add nsw i32 %5, 1, !dbg !59 + store i32 %inc, ptr %i, align 4, !dbg !59, !tbaa !24 + br label %for.cond, !dbg !55, !llvm.loop !60 + +for.end: ; preds = %for.cond.cleanup + ret void, !dbg !62 +} + +attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "various_ir_values.c", directory: "/data/build/llvm-project") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{!"clang version 11.0.0 (git@github.com:llvm/llvm-project.git 1d5da8cd30fce1c0a2c2fa6ba656dbfaa36192c8)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12) +!8 = !DISubroutineType(types: !9) +!9 = !{null, !10} +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !11, size: 64) +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !{!13, !14} +!13 = !DILocalVariable(name: "A", arg: 1, scope: !7, file: !1, line: 1, type: !10) +!14 = !DILocalVariable(name: "i", scope: !15, file: !1, line: 3, type: !11) +!15 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 3) +!16 = distinct !DIAssignID() +!17 = !DILocation(line: 1, column: 15, scope: !7) +!18 = !{!19, !19, i64 0} +!19 = !{!"any pointer", !20, i64 0} +!20 = !{!"omnipotent char", !21, i64 0} +!21 = !{!"Simple C/C++ TBAA"} +!22 = !DILocation(line: 3, column: 8, scope: !15) +!23 = !DILocation(line: 3, column: 12, scope: !15) +!24 = !{!25, !25, i64 0} +!25 = !{!"int", !20, i64 0} +!26 = !DILocation(line: 3, column: 19, scope: !27) +!27 = distinct !DILexicalBlock(scope: !15, file: !1, line: 3, column: 3) +!28 = !DILocation(line: 3, column: 24, scope: !27) +!29 = !DILocation(line: 3, column: 23, scope: !27) +!30 = !DILocation(line: 3, column: 21, scope: !27) +!31 = !DILocation(line: 3, column: 3, scope: !15) +!32 = !{!"branch_weights", i32 1, i32 1048575} +!33 = !DILocation(line: 3, column: 3, scope: !27) +!34 = !DILocation(line: 4, column: 5, scope: !27) +!35 = !DILocation(line: 4, column: 7, scope: !27) +!36 = !DILocation(line: 4, column: 10, scope: !27) +!37 = !DILocation(line: 3, column: 27, scope: !27) +!38 = distinct !{!38, !31, !39} +!39 = !DILocation(line: 4, column: 12, scope: !15) +!40 = !DILocation(line: 5, column: 1, scope: !7) +!41 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 7, type: !8, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !42) +!42 = !{!43, !44} +!43 = !DILocalVariable(name: "A", arg: 1, scope: !41, file: !1, line: 7, type: !10) +!44 = !DILocalVariable(name: "i", scope: !45, file: !1, line: 9, type: !11) +!45 = distinct !DILexicalBlock(scope: !41, file: !1, line: 9, column: 3) +!46 = !DILocation(line: 7, column: 15, scope: !41) +!47 = !DILocation(line: 9, column: 8, scope: !45) +!48 = !DILocation(line: 9, column: 12, scope: !45) +!49 = !DILocation(line: 9, column: 19, scope: !50) +!50 = distinct !DILexicalBlock(scope: !45, file: !1, line: 9, column: 3) +!51 = !DILocation(line: 9, column: 24, scope: !50) +!52 = !DILocation(line: 9, column: 23, scope: !50) +!53 = !DILocation(line: 9, column: 21, scope: !50) +!54 = !DILocation(line: 9, column: 3, scope: !45) +!55 = !DILocation(line: 9, column: 3, scope: !50) +!56 = !DILocation(line: 10, column: 5, scope: !50) +!57 = !DILocation(line: 10, column: 7, scope: !50) +!58 = !DILocation(line: 10, column: 10, scope: !50) +!59 = !DILocation(line: 9, column: 27, scope: !50) +!60 = distinct !{!60, !54, !61} +!61 = !DILocation(line: 10, column: 12, scope: !45) +!62 = !DILocation(line: 11, column: 1, scope: !41) +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) +; CHECK: [[META1]] = !DIFile(filename: "various_ir_values.c", directory: {{.*}}) +; CHECK: [[META2]] = !{} +; CHECK: [[META7:![0-9]+]] = distinct !DISubprogram(name: "foo", scope: [[META1]], file: [[META1]], line: 1, type: [[META8:![0-9]+]], scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META12:![0-9]+]]) +; CHECK: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; CHECK: [[META9]] = !{null, [[META10:![0-9]+]]} +; CHECK: [[META10]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[META11:![0-9]+]], size: 64) +; CHECK: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; CHECK: [[META12]] = !{[[META13]], [[META14]]} +; CHECK: [[META13]] = !DILocalVariable(name: "A", arg: 1, scope: [[META7]], file: [[META1]], line: 1, type: [[META10]]) +; CHECK: [[META14]] = !DILocalVariable(name: "i", scope: [[META15:![0-9]+]], file: [[META1]], line: 3, type: [[META11]]) +; CHECK: [[META15]] = distinct !DILexicalBlock(scope: [[META7]], file: [[META1]], line: 3, column: 3) +; CHECK: [[DIASSIGNID16]] = distinct !DIAssignID() +; CHECK: [[META17]] = !DILocation(line: 1, column: 15, scope: [[META7]]) +; CHECK: [[TBAA18]] = !{[[META19:![0-9]+]], [[META19]], i64 0} +; CHECK: [[META19]] = !{!"any pointer", [[META20:![0-9]+]], i64 0} +; CHECK: [[META20]] = !{!"omnipotent char", [[META21:![0-9]+]], i64 0} +; CHECK: [[META21]] = !{!"Simple C/C++ TBAA"} +; CHECK: [[DBG22]] = !DILocation(line: 3, column: 8, scope: [[META15]]) +; CHECK: [[META23]] = !DILocation(line: 3, column: 12, scope: [[META15]]) +; CHECK: [[TBAA24]] = !{[[META25:![0-9]+]], [[META25]], i64 0} +; CHECK: [[META25]] = !{!"int", [[META20]], i64 0} +; CHECK: [[DBG26]] = !DILocation(line: 3, column: 19, scope: [[META27:![0-9]+]]) +; CHECK: [[META27]] = distinct !DILexicalBlock(scope: [[META15]], file: [[META1]], line: 3, column: 3) +; CHECK: [[DBG28]] = !DILocation(line: 3, column: 24, scope: [[META27]]) +; CHECK: [[DBG29]] = !DILocation(line: 3, column: 23, scope: [[META27]]) +; CHECK: [[DBG30]] = !DILocation(line: 3, column: 21, scope: [[META27]]) +; CHECK: [[DBG31]] = !DILocation(line: 3, column: 3, scope: [[META15]]) +; CHECK: [[PROF32]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[DBG33]] = !DILocation(line: 3, column: 3, scope: [[META27]]) +; CHECK: [[DBG34]] = !DILocation(line: 4, column: 5, scope: [[META27]]) +; CHECK: [[DBG35]] = !DILocation(line: 4, column: 7, scope: [[META27]]) +; CHECK: [[DBG36]] = !DILocation(line: 4, column: 10, scope: [[META27]]) +; CHECK: [[DBG37]] = !DILocation(line: 3, column: 27, scope: [[META27]]) +; CHECK: [[LOOP38]] = distinct !{[[LOOP38]], [[DBG31]], [[META39:![0-9]+]]} +; CHECK: [[META39]] = !DILocation(line: 4, column: 12, scope: [[META15]]) +; CHECK: [[DBG40]] = !DILocation(line: 5, column: 1, scope: [[META7]]) +; CHECK: [[META41:![0-9]+]] = distinct !DISubprogram(name: "bar", scope: [[META1]], file: [[META1]], line: 7, type: [[META8]], scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: [[META0]], retainedNodes: [[META42:![0-9]+]]) +; CHECK: [[META42]] = !{[[META43]], [[META44]]} +; CHECK: [[META43]] = !DILocalVariable(name: "A", arg: 1, scope: [[META41]], file: [[META1]], line: 7, type: [[META10]]) +; CHECK: [[META44]] = !DILocalVariable(name: "i", scope: [[META45:![0-9]+]], file: [[META1]], line: 9, type: [[META11]]) +; CHECK: [[META45]] = distinct !DILexicalBlock(scope: [[META41]], file: [[META1]], line: 9, column: 3) +; CHECK: [[META46]] = !DILocation(line: 7, column: 15, scope: [[META41]]) +; CHECK: [[DBG47]] = !DILocation(line: 9, column: 8, scope: [[META45]]) +; CHECK: [[META48]] = !DILocation(line: 9, column: 12, scope: [[META45]]) +; CHECK: [[DBG49]] = !DILocation(line: 9, column: 19, scope: [[META50:![0-9]+]]) +; CHECK: [[META50]] = distinct !DILexicalBlock(scope: [[META45]], file: [[META1]], line: 9, column: 3) +; CHECK: [[DBG51]] = !DILocation(line: 9, column: 24, scope: [[META50]]) +; CHECK: [[DBG52]] = !DILocation(line: 9, column: 23, scope: [[META50]]) +; CHECK: [[DBG53]] = !DILocation(line: 9, column: 21, scope: [[META50]]) +; CHECK: [[DBG54]] = !DILocation(line: 9, column: 3, scope: [[META45]]) +; CHECK: [[DBG55]] = !DILocation(line: 9, column: 3, scope: [[META50]]) +; CHECK: [[DBG56]] = !DILocation(line: 10, column: 5, scope: [[META50]]) +; CHECK: [[DBG57]] = !DILocation(line: 10, column: 7, scope: [[META50]]) +; CHECK: [[DBG58]] = !DILocation(line: 10, column: 10, scope: [[META50]]) +; CHECK: [[DBG59]] = !DILocation(line: 9, column: 27, scope: [[META50]]) +; CHECK: [[LOOP60]] = distinct !{[[LOOP60]], [[DBG54]], [[META61:![0-9]+]]} +; CHECK: [[META61]] = !DILocation(line: 10, column: 12, scope: [[META45]]) +; CHECK: [[DBG62]] = !DILocation(line: 11, column: 1, scope: [[META41]]) +;. diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test new file mode 100644 index 0000000..411c84d --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/phi-labels.test @@ -0,0 +1,5 @@ +# RUN: cp -f %S/Inputs/phi-labels.ll %t.ll && %update_test_checks --version 4 %t.ll +# RUN: diff -u %t.ll %S/Inputs/phi-labels.ll.expected +## Check that running the script again does not change the result: +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/phi-labels.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test new file mode 100644 index 0000000..5132fb9 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/stable_ir_values_funcs.test @@ -0,0 +1,2 @@ +# RUN: cp -f %S/Inputs/stable_ir_values_funcs.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/stable_ir_values_funcs.ll.expected diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test b/llvm/test/tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test new file mode 100644 index 0000000..9cc77d8 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/various_ir_values_dbgrecords.test @@ -0,0 +1,24 @@ +## Basic test checking that update_test_checks.py works correctly on various "IR value" kinds +# RUN: cp -f %S/Inputs/various_ir_values_dbgrecords.ll %t.ll && %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.expected +## Check that running the script again does not change the result: +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.expected +## Also try the --function-signature flag +# RUN: %update_test_checks %t.ll --function-signature +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.expected +## Verify that running without the --function-signature flag does not removes +## the -SAME: lines since the generated file will have --function-signature in +## an UTC_ARGS: comment in the first line (from the invocation above) which is +## added to the update invocation below. +# RUN: %update_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.expected +## Also try the --check-globals flag +# RUN: %update_test_checks %t.ll --check-globals +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.globals.expected +# RUN: cp -f %S/Inputs/various_ir_values_dbgrecords.ll %t.ll && %update_test_checks %t.ll --function-signature --check-globals all +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.globals.expected +# RUN: cp -f %S/Inputs/various_ir_values_dbgrecords.ll %t.ll && %update_test_checks %t.ll --check-globals none +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.noglobals.expected +# RUN: cp -f %S/Inputs/various_ir_values_dbgrecords.ll %t.ll && %update_test_checks %t.ll --check-globals smart +# RUN: diff -u %t.ll %S/Inputs/various_ir_values_dbgrecords.ll.funcsig.transitiveglobals.expected diff --git a/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s b/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s new file mode 100644 index 0000000..064ffca --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s @@ -0,0 +1,38 @@ +## Disallow (de)compression for sections within a segment as they are +## effectively immutable. +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: yaml2obj %s -o a +# RUN: not llvm-objcopy a /dev/null --compress-sections .text=zlib 2>&1 | FileCheck %s --implicit-check-not=error: + +# CHECK: error: 'a': section '.text' within a segment cannot be (de)compressed + +# RUN: not llvm-objcopy a /dev/null --compress-sections foo=none 2>&1 | FileCheck %s --check-prefix=CHECK2 --implicit-check-not=error: + +# CHECK2: error: 'a': section 'foo' within a segment cannot be (de)compressed + +## There is an error even if 'foo' is already compressed with zlib. +# RUN: not llvm-objcopy a /dev/null --compress-sections foo=zlib 2>&1 | FileCheck %s --check-prefix=CHECK3 --implicit-check-not=error: + +# CHECK3: error: 'a': section 'foo' within a segment cannot be (de)compressed + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +ProgramHeaders: + - Type: PT_LOAD + FirstSec: .text + LastSec: foo + Align: 0x1000 + Offset: 0x1000 +Sections: + - Name: .text + Type: SHT_PROGBITS + Offset: 0x1000 + Content: C3 + - Name: foo + Type: SHT_PROGBITS + Flags: [ SHF_COMPRESSED ] + Content: 010000000000000040000000000000000100000000000000789cd36280002d3269002f800151 diff --git a/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s b/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s new file mode 100644 index 0000000..e6fa860 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s @@ -0,0 +1,128 @@ +# REQUIRES: x86-registered-target, zlib, zstd + +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o +## '*0=none' wins because it is the last. '*0' sections are decompressed (if originally compressed) or kept unchanged (if uncompressed). +## No section is named 'nomatch'. The third option is a no-op. +# RUN: llvm-objcopy a.o out --compress-sections='*0=zlib' --compress-sections '*0=none' --compress-sections 'nomatch=none' 2>&1 | count 0 +# RUN: llvm-readelf -S out | FileCheck %s --check-prefix=CHECK1 + +# CHECK1: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK1: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK1: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK1-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 3 8 +# CHECK1-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK1-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK1: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK1-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 7 8 +# CHECK1-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK1-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +## Mixing zlib and zstd. +# RUN: llvm-objcopy a.o out2 --compress-sections '*c0=zlib' --compress-sections .debug_str=zstd +# RUN: llvm-readelf -Sr -x nonalloc0 -x .debug_str out2 2>&1 | FileCheck %s --check-prefix=CHECK2 +# RUN: llvm-readelf -z -x nonalloc0 -x .debug_str out2 | FileCheck %s --check-prefix=CHECK2DE + +# CHECK2: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK2: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK2: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK2-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 3 8 +# CHECK2-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK2-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK2: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 8 +# CHECK2-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 7 8 +# CHECK2-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK2-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC 0 0 8 + +## llvm-readelf -r doesn't support SHF_COMPRESSED SHT_RELA. +# CHECK2: warning: {{.*}}: unable to read relocations from SHT_RELA section with index 8: section [index 8] has an invalid sh_size ([[#]]) which is not a multiple of its sh_entsize (24) + +# CHECK2: Hex dump of section 'nonalloc0': +## zlib with ch_size=0x10 +# CHECK2-NEXT: 01000000 00000000 10000000 00000000 +# CHECK2-NEXT: 08000000 00000000 {{.*}} +# CHECK2: Hex dump of section '.debug_str': +## zstd with ch_size=0x38 +# CHECK2-NEXT: 02000000 00000000 38000000 00000000 +# CHECK2-NEXT: 01000000 00000000 {{.*}} + +# CHECK2DE: Hex dump of section 'nonalloc0': +# CHECK2DE-NEXT: 0x00000000 00000000 00000000 00000000 00000000 ................ +# CHECK2DE-EMPTY: +# CHECK2DE-NEXT: Hex dump of section '.debug_str': +# CHECK2DE-NEXT: 0x00000000 41414141 41414141 41414141 41414141 AAAAAAAAAAAAAAAA + +## --decompress-debug-sections takes precedence, even if it is before --compress-sections. +# RUN: llvm-objcopy a.o out3 --decompress-debug-sections --compress-sections .debug_str=zstd +# RUN: llvm-readelf -S out3 | FileCheck %s --check-prefix=CHECK3 + +# CHECK3: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +# RUN: llvm-objcopy a.o out4 --compress-sections '*0=zlib' +# RUN: llvm-readelf -S out4 | FileCheck %s --check-prefix=CHECK4 + +# CHECK4: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK4: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK4: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 AC 0 0 8 +# CHECK4-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 3 8 +# CHECK4-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK4-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK4: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 8 +# CHECK4-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 7 8 +# CHECK4-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK4-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +## If a section is already compressed, compression request for another format is ignored. +# RUN: llvm-objcopy a.o out5 --compress-sections 'nonalloc0=zlib' +# RUN: llvm-readelf -x nonalloc0 out5 | FileCheck %s --check-prefix=CHECK5 +# RUN: llvm-objcopy out5 out5a --compress-sections 'nonalloc0=zstd' +# RUN: cmp out5 out5a + +# CHECK5: Hex dump of section 'nonalloc0': +## zlib with ch_size=0x10 +# CHECK5-NEXT: 01000000 00000000 10000000 00000000 +# CHECK5-NEXT: 08000000 00000000 {{.*}} + +# RUN: not llvm-objcopy --compress-sections=foo a.o out 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR1 --implicit-check-not=error: +# ERR1: error: --compress-sections: parse error, not 'section-glob=[none|zlib|zstd]' + +# RUN: llvm-objcopy --compress-sections 'a[=zlib' a.o out 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 --implicit-check-not=error: +# ERR2: warning: invalid glob pattern, unmatched '[' + +# RUN: not llvm-objcopy a.o out --compress-sections='.debug*=zlib-gabi' --compress-sections='.debug*=' 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR3 %s +# ERR3: error: invalid or unsupported --compress-sections format: .debug*=zlib-gabi + +# RUN: not llvm-objcopy a.o out --compress-sections='!.debug*=zlib' 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR4 %s +# ERR4: error: --compress-sections: negative pattern is unsupported + +.globl _start +_start: + ret + +.section foo0,"a" +.balign 8 +.quad .text-. +.quad .text-. +.section foo1,"a" +.balign 8 +.quad .text-. +.quad .text-. +.section nonalloc0,"" +.balign 8 +.quad .text+1 +.quad .text+2 +sym0: +.section nonalloc1,"" +.balign 8 +.quad 42 +sym1: + +.section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "AAAAAAAAAAAAAAAAAAAAAAAAAAA" +.Linfo_string1: + .asciz "BBBBBBBBBBBBBBBBBBBBBBBBBBB" diff --git a/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test b/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test index 4258ddb..d9f4f38 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test +++ b/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test @@ -4,6 +4,8 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objcopy --decompress-debug-sections %t %t.de # RUN: llvm-readelf -S %t.de | FileCheck %s +# RUN: llvm-objcopy --compress-sections '*nonalloc=none' --compress-sections .debugx=none %t %t.1.de +# RUN: cmp %t.de %t.1.de # CHECK: Name Type Address Off Size ES Flg Lk Inf Al # CHECK: .debug_alloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 AC 0 0 0 @@ -11,6 +13,33 @@ # CHECK-NEXT: .debugx PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 # CHECK-NEXT: nodebug PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 0 +# RUN: llvm-objcopy --compress-sections '.debug*=none' %t %t2.de +# RUN: llvm-readelf -S -x .debug_alloc -x .debug_nonalloc -x .debugx %t2.de | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK2: .debug_alloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 A 0 0 1 +# CHECK2-NEXT: .debug_nonalloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 +# CHECK2-NEXT: .debugx PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 +# CHECK2-NEXT: nodebug PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 0 + +# CHECK2: Hex dump of section '.debug_alloc': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-EMPTY: +# CHECK2: Hex dump of section '.debug_nonalloc': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-EMPTY: +# CHECK2-NEXT: Hex dump of section '.debugx': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... + --- !ELF FileHeader: Class: ELFCLASS64 diff --git a/llvm/test/tools/llvm-readobj/ELF/ARM/attribute-big-endian.test b/llvm/test/tools/llvm-readobj/ELF/ARM/attribute-big-endian.test index 7d20b31..3b94c79 100644 --- a/llvm/test/tools/llvm-readobj/ELF/ARM/attribute-big-endian.test +++ b/llvm/test/tools/llvm-readobj/ELF/ARM/attribute-big-endian.test @@ -1,17 +1,23 @@ -## We only implement attribute section printing for little-endian encoding. - # RUN: yaml2obj %s -o %t.o -# RUN: llvm-readobj -A %t.o 2>&1 | FileCheck %s -DFILE=%t.o +# RUN: llvm-readelf -A %t.o 2>&1 | FileCheck %s -# CHECK: warning: '[[FILE]]': attribute printing not implemented for big-endian ARM objects +# CHECK: BuildAttributes { +# CHECK-NEXT: FormatVersion: 0x41 +# CHECK-NEXT: Section 1 { +# CHECK-NEXT: SectionLength: 22 +# CHECK-NEXT: Vendor: armabi +# CHECK-NEXT: } +# CHECK-NEXT: } --- !ELF FileHeader: Class: ELFCLASS32 -## Test big-endian encoding. Data: ELFDATA2MSB Type: ET_REL Machine: EM_ARM Sections: - Name: .ARM.attributes Type: SHT_ARM_ATTRIBUTES + ContentArray: [ 0x41, 0x00, 0x00, 0x00, 0x16, 0x61, 0x72, 0x6D, 0x61, 0x62, + 0x69, 0x00, 0x01, 0x0b, 0x00, 0x00, 0x00, 0x04, 0x01, 0x06, 0x01, 0x08, + 0x01 ] |