diff options
Diffstat (limited to 'llvm/test/CodeGen/RISCV')
30 files changed, 6146 insertions, 646 deletions
diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 28a95ef..f11a9c8 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -2011,50 +2011,50 @@ define i64 @abd_subnsw_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a4, 0(a2) -; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a4, 4(a2) ; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) +; RV32I-NEXT: lw a2, 12(a2) ; RV32I-NEXT: lw a7, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a6, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a6 -; RV32I-NEXT: sltu a6, a2, a4 +; RV32I-NEXT: sub t0, t0, a2 +; RV32I-NEXT: sltu a2, a6, a3 ; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a1, a3, .LBB31_2 +; RV32I-NEXT: mv t1, a2 +; RV32I-NEXT: beq a1, a4, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a3 +; RV32I-NEXT: sltu t1, a1, a4 ; RV32I-NEXT: .LBB31_2: ; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a1, a3 -; RV32I-NEXT: sltu a1, a5, t1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a4, a5, t1 ; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a3, a3, a6 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: bgez a1, .LBB31_4 +; RV32I-NEXT: sub a4, t0, a4 +; RV32I-NEXT: sub a2, a1, a2 +; RV32I-NEXT: sub a1, a6, a3 +; RV32I-NEXT: bgez a4, .LBB31_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a4, a3 -; RV32I-NEXT: snez a6, a2 +; RV32I-NEXT: snez a3, a2 +; RV32I-NEXT: snez a6, a1 ; RV32I-NEXT: neg a7, a5 ; RV32I-NEXT: snez a5, a5 +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: add a2, a2, a6 +; RV32I-NEXT: sltu a6, a7, a3 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a5, a7, a3 ; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: add a1, a1, a5 -; RV32I-NEXT: add a3, a3, a6 -; RV32I-NEXT: sltu a6, a7, a4 +; RV32I-NEXT: sub a4, a4, a6 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a5, a7, a4 -; RV32I-NEXT: sub a1, a1, a6 -; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB31_4: -; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128: @@ -2074,50 +2074,50 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a4, 0(a2) -; RV32ZBB-NEXT: lw a3, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) ; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) +; RV32ZBB-NEXT: lw a2, 12(a2) ; RV32ZBB-NEXT: lw a7, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a6, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) ; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a6 -; RV32ZBB-NEXT: sltu a6, a2, a4 +; RV32ZBB-NEXT: sub t0, t0, a2 +; RV32ZBB-NEXT: sltu a2, a6, a3 ; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a1, a3, .LBB31_2 +; RV32ZBB-NEXT: mv t1, a2 +; RV32ZBB-NEXT: beq a1, a4, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a3 +; RV32ZBB-NEXT: sltu t1, a1, a4 ; RV32ZBB-NEXT: .LBB31_2: ; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a1, a3 -; RV32ZBB-NEXT: sltu a1, a5, t1 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sltu a4, a5, t1 ; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a1, t0, a1 -; RV32ZBB-NEXT: sub a3, a3, a6 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: bgez a1, .LBB31_4 +; RV32ZBB-NEXT: sub a4, t0, a4 +; RV32ZBB-NEXT: sub a2, a1, a2 +; RV32ZBB-NEXT: sub a1, a6, a3 +; RV32ZBB-NEXT: bgez a4, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a4, a3 -; RV32ZBB-NEXT: snez a6, a2 +; RV32ZBB-NEXT: snez a3, a2 +; RV32ZBB-NEXT: snez a6, a1 ; RV32ZBB-NEXT: neg a7, a5 ; RV32ZBB-NEXT: snez a5, a5 +; RV32ZBB-NEXT: or a3, a6, a3 +; RV32ZBB-NEXT: add a4, a4, a5 +; RV32ZBB-NEXT: add a2, a2, a6 +; RV32ZBB-NEXT: sltu a6, a7, a3 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a5, a7, a3 ; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: or a4, a6, a4 -; RV32ZBB-NEXT: add a1, a1, a5 -; RV32ZBB-NEXT: add a3, a3, a6 -; RV32ZBB-NEXT: sltu a6, a7, a4 +; RV32ZBB-NEXT: sub a4, a4, a6 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a5, a7, a4 -; RV32ZBB-NEXT: sub a1, a1, a6 -; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB31_4: -; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a5, 8(a0) -; RV32ZBB-NEXT: sw a1, 12(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128: @@ -2142,50 +2142,50 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_subnsw_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a4, 0(a2) -; RV32I-NEXT: lw a3, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a4, 4(a2) ; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) +; RV32I-NEXT: lw a2, 12(a2) ; RV32I-NEXT: lw a7, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a6, 0(a1) ; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a6 -; RV32I-NEXT: sltu a6, a2, a4 +; RV32I-NEXT: sub t0, t0, a2 +; RV32I-NEXT: sltu a2, a6, a3 ; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a1, a3, .LBB32_2 +; RV32I-NEXT: mv t1, a2 +; RV32I-NEXT: beq a1, a4, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a3 +; RV32I-NEXT: sltu t1, a1, a4 ; RV32I-NEXT: .LBB32_2: ; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a1, a3 -; RV32I-NEXT: sltu a1, a5, t1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sltu a4, a5, t1 ; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a3, a3, a6 -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: bgez a1, .LBB32_4 +; RV32I-NEXT: sub a4, t0, a4 +; RV32I-NEXT: sub a2, a1, a2 +; RV32I-NEXT: sub a1, a6, a3 +; RV32I-NEXT: bgez a4, .LBB32_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: snez a4, a3 -; RV32I-NEXT: snez a6, a2 +; RV32I-NEXT: snez a3, a2 +; RV32I-NEXT: snez a6, a1 ; RV32I-NEXT: neg a7, a5 ; RV32I-NEXT: snez a5, a5 +; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: add a4, a4, a5 +; RV32I-NEXT: add a2, a2, a6 +; RV32I-NEXT: sltu a6, a7, a3 +; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: sub a5, a7, a3 ; RV32I-NEXT: neg a2, a2 -; RV32I-NEXT: or a4, a6, a4 -; RV32I-NEXT: add a1, a1, a5 -; RV32I-NEXT: add a3, a3, a6 -; RV32I-NEXT: sltu a6, a7, a4 +; RV32I-NEXT: sub a4, a4, a6 ; RV32I-NEXT: neg a1, a1 -; RV32I-NEXT: sub a5, a7, a4 -; RV32I-NEXT: sub a1, a1, a6 -; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB32_4: -; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) ; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128_undef: @@ -2205,50 +2205,50 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_subnsw_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a4, 0(a2) -; RV32ZBB-NEXT: lw a3, 4(a2) +; RV32ZBB-NEXT: lw a3, 0(a2) +; RV32ZBB-NEXT: lw a4, 4(a2) ; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) +; RV32ZBB-NEXT: lw a2, 12(a2) ; RV32ZBB-NEXT: lw a7, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a6, 0(a1) ; RV32ZBB-NEXT: lw a1, 4(a1) ; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a6 -; RV32ZBB-NEXT: sltu a6, a2, a4 +; RV32ZBB-NEXT: sub t0, t0, a2 +; RV32ZBB-NEXT: sltu a2, a6, a3 ; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a1, a3, .LBB32_2 +; RV32ZBB-NEXT: mv t1, a2 +; RV32ZBB-NEXT: beq a1, a4, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a3 +; RV32ZBB-NEXT: sltu t1, a1, a4 ; RV32ZBB-NEXT: .LBB32_2: ; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a1, a3 -; RV32ZBB-NEXT: sltu a1, a5, t1 +; RV32ZBB-NEXT: sub a1, a1, a4 +; RV32ZBB-NEXT: sltu a4, a5, t1 ; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a1, t0, a1 -; RV32ZBB-NEXT: sub a3, a3, a6 -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: bgez a1, .LBB32_4 +; RV32ZBB-NEXT: sub a4, t0, a4 +; RV32ZBB-NEXT: sub a2, a1, a2 +; RV32ZBB-NEXT: sub a1, a6, a3 +; RV32ZBB-NEXT: bgez a4, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: snez a4, a3 -; RV32ZBB-NEXT: snez a6, a2 +; RV32ZBB-NEXT: snez a3, a2 +; RV32ZBB-NEXT: snez a6, a1 ; RV32ZBB-NEXT: neg a7, a5 ; RV32ZBB-NEXT: snez a5, a5 +; RV32ZBB-NEXT: or a3, a6, a3 +; RV32ZBB-NEXT: add a4, a4, a5 +; RV32ZBB-NEXT: add a2, a2, a6 +; RV32ZBB-NEXT: sltu a6, a7, a3 +; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: sub a5, a7, a3 ; RV32ZBB-NEXT: neg a2, a2 -; RV32ZBB-NEXT: or a4, a6, a4 -; RV32ZBB-NEXT: add a1, a1, a5 -; RV32ZBB-NEXT: add a3, a3, a6 -; RV32ZBB-NEXT: sltu a6, a7, a4 +; RV32ZBB-NEXT: sub a4, a4, a6 ; RV32ZBB-NEXT: neg a1, a1 -; RV32ZBB-NEXT: sub a5, a7, a4 -; RV32ZBB-NEXT: sub a1, a1, a6 -; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB32_4: -; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a1, 0(a0) +; RV32ZBB-NEXT: sw a2, 4(a0) ; RV32ZBB-NEXT: sw a5, 8(a0) -; RV32ZBB-NEXT: sw a1, 12(a0) +; RV32ZBB-NEXT: sw a4, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128_undef: diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll new file mode 100644 index 0000000..be3de37 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +define i1 @src(i64 %x) { +; CHECK-LABEL: src: +; CHECK: # %bb.0: +; CHECK-NEXT: srai a0, a0, 30 +; CHECK-NEXT: addi a0, a0, 2 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: ret + %a = and i64 %x, -1073741824 + %b = icmp eq i64 %a, -2147483648 + ret i1 %b +} diff --git a/llvm/test/CodeGen/RISCV/combine-storetomstore.ll b/llvm/test/CodeGen/RISCV/combine-storetomstore.ll new file mode 100644 index 0000000..c7d1f76 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/combine-storetomstore.ll @@ -0,0 +1,684 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv64-- -mattr=+m,+v,+f | FileCheck %s -check-prefix=RISCV + +define void @test_masked_store_success_v4i8(<4 x i8> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4i8: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RISCV-NEXT: vse8.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x i8>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x i8> %x, <4 x i8> %load + store <4 x i8> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4i16: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RISCV-NEXT: vse16.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x i16>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x i16> %x, <4 x i16> %load + store <4 x i16> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x i32>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %load + store <4 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4i64: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RISCV-NEXT: vse64.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x i64>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %load + store <4 x i64> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4f16: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RISCV-NEXT: vmv1r.v v9, v0 +; RISCV-NEXT: vfirst.m a3, v0 +; RISCV-NEXT: mv a2, a0 +; RISCV-NEXT: beqz a3, .LBB4_2 +; RISCV-NEXT: # %bb.1: +; RISCV-NEXT: mv a2, a1 +; RISCV-NEXT: .LBB4_2: +; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RISCV-NEXT: vmv.v.i v8, 0 +; RISCV-NEXT: vmv1r.v v0, v9 +; RISCV-NEXT: vmerge.vim v8, v8, 1, v0 +; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RISCV-NEXT: vslidedown.vi v8, v8, 2 +; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RISCV-NEXT: vmsne.vi v8, v8, 0 +; RISCV-NEXT: vmv.v.i v10, 0 +; RISCV-NEXT: vmv1r.v v0, v8 +; RISCV-NEXT: vmerge.vim v11, v10, 1, v0 +; RISCV-NEXT: vslidedown.vi v11, v11, 1 +; RISCV-NEXT: vmv.x.s a3, v11 +; RISCV-NEXT: andi a3, a3, 1 +; RISCV-NEXT: bnez a3, .LBB4_4 +; RISCV-NEXT: # %bb.3: +; RISCV-NEXT: addi a3, a1, 6 +; RISCV-NEXT: j .LBB4_5 +; RISCV-NEXT: .LBB4_4: +; RISCV-NEXT: addi a3, a0, 24 +; RISCV-NEXT: .LBB4_5: +; RISCV-NEXT: vmv1r.v v0, v9 +; RISCV-NEXT: vmerge.vim v9, v10, 1, v0 +; RISCV-NEXT: vslidedown.vi v9, v9, 1 +; RISCV-NEXT: vmv.x.s a4, v9 +; RISCV-NEXT: andi a4, a4, 1 +; RISCV-NEXT: bnez a4, .LBB4_7 +; RISCV-NEXT: # %bb.6: +; RISCV-NEXT: addi a5, a1, 2 +; RISCV-NEXT: j .LBB4_8 +; RISCV-NEXT: .LBB4_7: +; RISCV-NEXT: addi a5, a0, 8 +; RISCV-NEXT: .LBB4_8: +; RISCV-NEXT: lh a4, 0(a2) +; RISCV-NEXT: lh a2, 0(a3) +; RISCV-NEXT: lh a3, 0(a5) +; RISCV-NEXT: vfirst.m a5, v8 +; RISCV-NEXT: beqz a5, .LBB4_10 +; RISCV-NEXT: # %bb.9: +; RISCV-NEXT: addi a0, a1, 4 +; RISCV-NEXT: j .LBB4_11 +; RISCV-NEXT: .LBB4_10: +; RISCV-NEXT: addi a0, a0, 16 +; RISCV-NEXT: .LBB4_11: +; RISCV-NEXT: lh a0, 0(a0) +; RISCV-NEXT: sh a4, 0(a1) +; RISCV-NEXT: sh a3, 2(a1) +; RISCV-NEXT: sh a0, 4(a1) +; RISCV-NEXT: sh a2, 6(a1) +; RISCV-NEXT: ret + %load = load <4 x half>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x half> %x, <4 x half> %load + store <4 x half> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4f32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x float>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x float> %x, <4 x float> %load + store <4 x float> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v4f64: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RISCV-NEXT: vse64.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x double>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x double> %x, <4 x double> %load + store <4 x double> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8i8: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RISCV-NEXT: vse8.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x i8>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i8> %x, <8 x i8> %load + store <8 x i8> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8i16: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RISCV-NEXT: vse16.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x i16>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %load + store <8 x i16> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x i32>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load + store <8 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8i64: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RISCV-NEXT: vse64.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x i64>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load + store <8 x i64> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8f16: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RISCV-NEXT: vmv1r.v v8, v0 +; RISCV-NEXT: vfirst.m a3, v0 +; RISCV-NEXT: mv a2, a0 +; RISCV-NEXT: beqz a3, .LBB11_2 +; RISCV-NEXT: # %bb.1: +; RISCV-NEXT: mv a2, a1 +; RISCV-NEXT: .LBB11_2: +; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RISCV-NEXT: vmv.v.i v9, 0 +; RISCV-NEXT: vmv1r.v v0, v8 +; RISCV-NEXT: vmerge.vim v9, v9, 1, v0 +; RISCV-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; RISCV-NEXT: vslidedown.vi v9, v9, 4 +; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RISCV-NEXT: vmsne.vi v11, v9, 0 +; RISCV-NEXT: vmv.v.i v10, 0 +; RISCV-NEXT: vmv1r.v v0, v11 +; RISCV-NEXT: vmerge.vim v9, v10, 1, v0 +; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RISCV-NEXT: vslidedown.vi v9, v9, 2 +; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RISCV-NEXT: vmsne.vi v9, v9, 0 +; RISCV-NEXT: vmv.v.i v12, 0 +; RISCV-NEXT: vmv1r.v v0, v9 +; RISCV-NEXT: vmerge.vim v13, v12, 1, v0 +; RISCV-NEXT: vslidedown.vi v13, v13, 1 +; RISCV-NEXT: vmv.x.s a3, v13 +; RISCV-NEXT: andi a3, a3, 1 +; RISCV-NEXT: bnez a3, .LBB11_4 +; RISCV-NEXT: # %bb.3: +; RISCV-NEXT: addi a3, a1, 14 +; RISCV-NEXT: j .LBB11_5 +; RISCV-NEXT: .LBB11_4: +; RISCV-NEXT: addi a3, a0, 56 +; RISCV-NEXT: .LBB11_5: +; RISCV-NEXT: vmv1r.v v0, v8 +; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RISCV-NEXT: vmerge.vim v10, v10, 1, v0 +; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RISCV-NEXT: vslidedown.vi v10, v10, 2 +; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; RISCV-NEXT: vmsne.vi v10, v10, 0 +; RISCV-NEXT: vmv1r.v v0, v10 +; RISCV-NEXT: vmerge.vim v13, v12, 1, v0 +; RISCV-NEXT: vslidedown.vi v13, v13, 1 +; RISCV-NEXT: vmv.x.s a4, v13 +; RISCV-NEXT: andi a4, a4, 1 +; RISCV-NEXT: bnez a4, .LBB11_8 +; RISCV-NEXT: # %bb.6: +; RISCV-NEXT: addi a4, a1, 6 +; RISCV-NEXT: vfirst.m a5, v11 +; RISCV-NEXT: bnez a5, .LBB11_9 +; RISCV-NEXT: .LBB11_7: +; RISCV-NEXT: addi a5, a0, 32 +; RISCV-NEXT: j .LBB11_10 +; RISCV-NEXT: .LBB11_8: +; RISCV-NEXT: addi a4, a0, 24 +; RISCV-NEXT: vfirst.m a5, v11 +; RISCV-NEXT: beqz a5, .LBB11_7 +; RISCV-NEXT: .LBB11_9: +; RISCV-NEXT: addi a5, a1, 8 +; RISCV-NEXT: .LBB11_10: +; RISCV-NEXT: vmv1r.v v0, v11 +; RISCV-NEXT: vmerge.vim v11, v12, 1, v0 +; RISCV-NEXT: vslidedown.vi v11, v11, 1 +; RISCV-NEXT: vmv.x.s a6, v11 +; RISCV-NEXT: andi a6, a6, 1 +; RISCV-NEXT: bnez a6, .LBB11_14 +; RISCV-NEXT: # %bb.11: +; RISCV-NEXT: addi a6, a1, 10 +; RISCV-NEXT: vfirst.m a7, v9 +; RISCV-NEXT: bnez a7, .LBB11_15 +; RISCV-NEXT: .LBB11_12: +; RISCV-NEXT: addi a7, a0, 48 +; RISCV-NEXT: vfirst.m t0, v10 +; RISCV-NEXT: bnez t0, .LBB11_16 +; RISCV-NEXT: .LBB11_13: +; RISCV-NEXT: addi t1, a0, 16 +; RISCV-NEXT: j .LBB11_17 +; RISCV-NEXT: .LBB11_14: +; RISCV-NEXT: addi a6, a0, 40 +; RISCV-NEXT: vfirst.m a7, v9 +; RISCV-NEXT: beqz a7, .LBB11_12 +; RISCV-NEXT: .LBB11_15: +; RISCV-NEXT: addi a7, a1, 12 +; RISCV-NEXT: vfirst.m t0, v10 +; RISCV-NEXT: beqz t0, .LBB11_13 +; RISCV-NEXT: .LBB11_16: +; RISCV-NEXT: addi t1, a1, 4 +; RISCV-NEXT: .LBB11_17: +; RISCV-NEXT: vmv1r.v v0, v8 +; RISCV-NEXT: lh t0, 0(a2) +; RISCV-NEXT: lh a2, 0(a3) +; RISCV-NEXT: lh a3, 0(a4) +; RISCV-NEXT: lh a4, 0(a5) +; RISCV-NEXT: lh a5, 0(a6) +; RISCV-NEXT: lh a6, 0(a7) +; RISCV-NEXT: lh a7, 0(t1) +; RISCV-NEXT: vmerge.vim v8, v12, 1, v0 +; RISCV-NEXT: vslidedown.vi v8, v8, 1 +; RISCV-NEXT: vmv.x.s t1, v8 +; RISCV-NEXT: andi t1, t1, 1 +; RISCV-NEXT: bnez t1, .LBB11_19 +; RISCV-NEXT: # %bb.18: +; RISCV-NEXT: addi a0, a1, 2 +; RISCV-NEXT: j .LBB11_20 +; RISCV-NEXT: .LBB11_19: +; RISCV-NEXT: addi a0, a0, 8 +; RISCV-NEXT: .LBB11_20: +; RISCV-NEXT: lh a0, 0(a0) +; RISCV-NEXT: sh t0, 0(a1) +; RISCV-NEXT: sh a0, 2(a1) +; RISCV-NEXT: sh a7, 4(a1) +; RISCV-NEXT: sh a3, 6(a1) +; RISCV-NEXT: sh a4, 8(a1) +; RISCV-NEXT: sh a5, 10(a1) +; RISCV-NEXT: sh a6, 12(a1) +; RISCV-NEXT: sh a2, 14(a1) +; RISCV-NEXT: ret + %load = load <8 x half>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x half> %x, <8 x half> %load + store <8 x half> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8f32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x float>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x float> %x, <8 x float> %load + store <8 x float> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v8f64: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RISCV-NEXT: vse64.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x double>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x double> %x, <8 x double> %load + store <8 x double> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v16i8: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RISCV-NEXT: vse8.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <16 x i8>, ptr %ptr, align 32 + %sel = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %load + store <16 x i8> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v16i16: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; RISCV-NEXT: vse16.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <16 x i16>, ptr %ptr, align 32 + %sel = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %load + store <16 x i16> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v16i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <16 x i32>, ptr %ptr, align 32 + %sel = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %load + store <16 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v32i8: +; RISCV: # %bb.0: +; RISCV-NEXT: li a1, 32 +; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RISCV-NEXT: vse8.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <32 x i8>, ptr %ptr, align 32 + %sel = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %load + store <32 x i8> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v32i16: +; RISCV: # %bb.0: +; RISCV-NEXT: li a1, 32 +; RISCV-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; RISCV-NEXT: vse16.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <32 x i16>, ptr %ptr, align 32 + %sel = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %load + store <32 x i16> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_v64i8: +; RISCV: # %bb.0: +; RISCV-NEXT: li a1, 64 +; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RISCV-NEXT: vse8.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <64 x i8>, ptr %ptr, align 32 + %sel = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %load + store <64 x i8> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_invert_mask_v4i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; RISCV-NEXT: vmnot.m v0, v0 +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <4 x i32>, ptr %ptr, align 32 + %sel = select <4 x i1> %mask, <4 x i32> %load, <4 x i32> %x + store <4 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_invert_mask_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_invert_mask_v8i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RISCV-NEXT: vmnot.m v0, v0 +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <8 x i32>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i32> %load, <8 x i32> %x + store <8 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_success_invert_mask_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) { +; RISCV-LABEL: test_masked_store_success_invert_mask_v16i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RISCV-NEXT: vmnot.m v0, v0 +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: ret + %load = load <16 x i32>, ptr %ptr, align 32 + %sel = select <16 x i1> %mask, <16 x i32> %load, <16 x i32> %x + store <16 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_zextload(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_zextload: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RISCV-NEXT: vle32.v v12, (a0) +; RISCV-NEXT: vzext.vf2 v10, v12 +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vse64.v v8, (a0) +; RISCV-NEXT: ret + %load = load <4 x i32>, ptr %ptr, align 32 + %zext = zext <4 x i32> %load to <4 x i64> + %masked = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %zext + store <4 x i64> %masked, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_volatile_load: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vle32.v v10, (a0) +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vse32.v v8, (a0) +; RISCV-NEXT: ret + %load = load volatile <8 x i32>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load + store <8 x i32> %sel, ptr %ptr, align 32 + ret void +} + +define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_volatile_store: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vle32.v v10, (a0) +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vse32.v v8, (a0) +; RISCV-NEXT: ret + %load = load <8 x i32>, ptr %ptr, align 32 + %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load + store volatile <8 x i32> %sel, ptr %ptr, align 32 + ret void +} + +declare void @use_vec(<8 x i32>) + +define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) nounwind { +; RISCV-LABEL: test_masked_store_intervening: +; RISCV: # %bb.0: +; RISCV-NEXT: addi sp, sp, -32 +; RISCV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RISCV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RISCV-NEXT: csrr a1, vlenb +; RISCV-NEXT: slli a2, a1, 2 +; RISCV-NEXT: add a1, a2, a1 +; RISCV-NEXT: sub sp, sp, a1 +; RISCV-NEXT: csrr a1, vlenb +; RISCV-NEXT: slli a1, a1, 2 +; RISCV-NEXT: add a1, sp, a1 +; RISCV-NEXT: addi a1, a1, 16 +; RISCV-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill +; RISCV-NEXT: mv s0, a0 +; RISCV-NEXT: csrr a1, vlenb +; RISCV-NEXT: slli a1, a1, 1 +; RISCV-NEXT: add a1, sp, a1 +; RISCV-NEXT: addi a1, a1, 16 +; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vle32.v v8, (a0) +; RISCV-NEXT: addi a1, sp, 16 +; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill +; RISCV-NEXT: vmv.v.i v8, 0 +; RISCV-NEXT: vse32.v v8, (a0) +; RISCV-NEXT: call use_vec +; RISCV-NEXT: csrr a0, vlenb +; RISCV-NEXT: slli a0, a0, 2 +; RISCV-NEXT: add a0, sp, a0 +; RISCV-NEXT: addi a0, a0, 16 +; RISCV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload +; RISCV-NEXT: csrr a0, vlenb +; RISCV-NEXT: slli a0, a0, 1 +; RISCV-NEXT: add a0, sp, a0 +; RISCV-NEXT: addi a0, a0, 16 +; RISCV-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload +; RISCV-NEXT: addi a0, sp, 16 +; RISCV-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vse32.v v8, (s0) +; RISCV-NEXT: csrr a0, vlenb +; RISCV-NEXT: slli a1, a0, 2 +; RISCV-NEXT: add a0, a1, a0 +; RISCV-NEXT: add sp, sp, a0 +; RISCV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RISCV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RISCV-NEXT: addi sp, sp, 32 +; RISCV-NEXT: ret + %load = load <8 x i32>, ptr %ptr, align 32 + store <8 x i32> zeroinitializer, ptr %ptr, align 32 + %tmp = load <8 x i32>, ptr %ptr + call void @use_vec(<8 x i32> %tmp) + %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load + store <8 x i32> %sel, ptr %ptr, align 32 + ret void +} + + +define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) { +; RISCV-LABEL: test_masked_store_multiple_v8i32: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vmv1r.v v13, v0 +; RISCV-NEXT: vle32.v v14, (a1) +; RISCV-NEXT: vmv1r.v v0, v12 +; RISCV-NEXT: vmerge.vvm v10, v14, v10, v0 +; RISCV-NEXT: vmv1r.v v0, v13 +; RISCV-NEXT: vse32.v v8, (a0), v0.t +; RISCV-NEXT: vse32.v v10, (a1) +; RISCV-NEXT: ret + %load = load <8 x i32>, ptr %ptr1, align 32 + %load2 = load <8 x i32>, ptr %ptr2, align 32 + %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load + %sel2 = select <8 x i1> %mask2, <8 x i32> %y, <8 x i32> %load2 + store <8 x i32> %sel, ptr %ptr1, align 32 + store <8 x i32> %sel2, ptr %ptr2, align 32 + ret void +} + +define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) { +; RISCV-LABEL: test_masked_store_multiple_v8i64: +; RISCV: # %bb.0: +; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RISCV-NEXT: vmv1r.v v17, v0 +; RISCV-NEXT: vle64.v v20, (a1) +; RISCV-NEXT: vmv1r.v v0, v16 +; RISCV-NEXT: vmerge.vvm v12, v20, v12, v0 +; RISCV-NEXT: vmv1r.v v0, v17 +; RISCV-NEXT: vse64.v v8, (a0), v0.t +; RISCV-NEXT: vse64.v v12, (a1) +; RISCV-NEXT: ret + %load = load <8 x i64>, ptr %ptr1, align 32 + %load2 = load <8 x i64>, ptr %ptr2, align 32 + %sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load + %sel2 = select <8 x i1> %mask2, <8 x i64> %y, <8 x i64> %load2 + store <8 x i64> %sel, ptr %ptr1, align 32 + store <8 x i64> %sel2, ptr %ptr2, align 32 + ret void +} + +define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_unaligned_v4i32: +; RISCV: # %bb.0: +; RISCV-NEXT: addi a0, a0, 1 +; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RISCV-NEXT: vle8.v v9, (a0) +; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RISCV-NEXT: vmerge.vvm v8, v9, v8, v0 +; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RISCV-NEXT: vse8.v v8, (a0) +; RISCV-NEXT: ret + %ptr_i8 = getelementptr i8, ptr %ptr, i32 1 + %ptr_vec = bitcast ptr %ptr_i8 to ptr + %load = load <4 x i32>, ptr %ptr_vec, align 1 + %sel = select <4 x i1> %mask, <4 x i32> %data, <4 x i32> %load + store <4 x i32> %sel, ptr %ptr_vec, align 1 + ret void +} + +define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i1> %mask) { +; RISCV-LABEL: test_masked_store_unaligned_v4i64: +; RISCV: # %bb.0: +; RISCV-NEXT: addi a0, a0, 1 +; RISCV-NEXT: li a1, 32 +; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RISCV-NEXT: vle8.v v10, (a0) +; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RISCV-NEXT: vse8.v v8, (a0) +; RISCV-NEXT: ret + %ptr_i8 = getelementptr i8, ptr %ptr, i64 1 + %ptr_vec = bitcast ptr %ptr_i8 to ptr + %load = load <4 x i64>, ptr %ptr_vec, align 1 + %sel = select <4 x i1> %mask, <4 x i64> %data, <4 x i64> %load + store <4 x i64> %sel, ptr %ptr_vec, align 1 + ret void +} + +define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_unaligned_v8i32: +; RISCV: # %bb.0: +; RISCV-NEXT: addi a0, a0, 1 +; RISCV-NEXT: li a1, 32 +; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RISCV-NEXT: vle8.v v10, (a0) +; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 +; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RISCV-NEXT: vse8.v v8, (a0) +; RISCV-NEXT: ret + %ptr_i8 = getelementptr i8, ptr %ptr, i32 1 + %ptr_vec = bitcast ptr %ptr_i8 to ptr + %load = load <8 x i32>, ptr %ptr_vec, align 1 + %sel = select <8 x i1> %mask, <8 x i32> %data, <8 x i32> %load + store <8 x i32> %sel, ptr %ptr_vec, align 1 + ret void +} + +define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i1> %mask) { +; RISCV-LABEL: test_masked_store_unaligned_v8i64: +; RISCV: # %bb.0: +; RISCV-NEXT: addi a0, a0, 1 +; RISCV-NEXT: li a1, 64 +; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RISCV-NEXT: vle8.v v12, (a0) +; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RISCV-NEXT: vmerge.vvm v8, v12, v8, v0 +; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; RISCV-NEXT: vse8.v v8, (a0) +; RISCV-NEXT: ret + %ptr_i8 = getelementptr i8, ptr %ptr, i64 1 + %ptr_vec = bitcast ptr %ptr_i8 to ptr + %load = load <8 x i64>, ptr %ptr_vec, align 1 + %sel = select <8 x i1> %mask, <8 x i64> %data, <8 x i64> %load + store <8 x i64> %sel, ptr %ptr_vec, align 1 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index b94665b..fb53921 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -6,13 +6,21 @@ ; CHECK-NEXT: 32bit - Implements RV32. ; CHECK-NEXT: 64bit - Implements RV64. ; CHECK-NEXT: a - 'A' (Atomic Instructions). +; CHECK-NEXT: add-load-fusion - Enable ADD(.UW) + load macrofusion. +; CHECK-NEXT: addi-load-fusion - Enable ADDI + load macrofusion. ; CHECK-NEXT: andes45 - Andes 45-Series processors. ; CHECK-NEXT: auipc-addi-fusion - Enable AUIPC+ADDI macrofusion. +; CHECK-NEXT: auipc-load-fusion - Enable AUIPC + load macrofusion. ; CHECK-NEXT: b - 'B' (the collection of the Zba, Zbb, Zbs extensions). +; CHECK-NEXT: bfext-fusion - Enable SLLI+SRLI (bitfield extract) macrofusion. ; CHECK-NEXT: c - 'C' (Compressed Instructions). ; CHECK-NEXT: conditional-cmv-fusion - Enable branch+c.mv fusion. ; CHECK-NEXT: d - 'D' (Double-Precision Floating-Point). ; CHECK-NEXT: disable-latency-sched-heuristic - Disable latency scheduling heuristic. +; CHECK-NEXT: disable-misched-load-clustering - Disable load clustering in the machine scheduler. +; CHECK-NEXT: disable-misched-store-clustering - Disable store clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-load-clustering - Disable PostRA load clustering in the machine scheduler. +; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler. ; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN. ; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs). ; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation). @@ -58,6 +66,7 @@ ; CHECK-NEXT: ld-add-fusion - Enable LD+ADD macrofusion. ; CHECK-NEXT: log-vrgather - Has vrgather.vv with LMUL*log2(LMUL) latency ; CHECK-NEXT: lui-addi-fusion - Enable LUI+ADDI macro fusion. +; CHECK-NEXT: lui-load-fusion - Enable LUI + load macrofusion. ; CHECK-NEXT: m - 'M' (Integer Multiplication and Division). ; CHECK-NEXT: mips-p8700 - MIPS p8700 processor. ; CHECK-NEXT: no-default-unroll - Disable default unroll preference.. @@ -130,6 +139,7 @@ ; CHECK-NEXT: shvsatpa - 'Shvsatpa' (vsatp supports all modes supported by satp). ; CHECK-NEXT: shvstvala - 'Shvstvala' (vstval provides all needed values). ; CHECK-NEXT: shvstvecd - 'Shvstvecd' (vstvec supports Direct mode). +; CHECK-NEXT: shxadd-load-fusion - Enable SH(1|2|3)ADD(.UW) + load macrofusion. ; CHECK-NEXT: sifive7 - SiFive 7-Series processors. ; CHECK-NEXT: smaia - 'Smaia' (Advanced Interrupt Architecture Machine Level). ; CHECK-NEXT: smcdeleg - 'Smcdeleg' (Counter Delegation Machine Level). diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index 117e3e4..519f1e8 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -1110,15 +1110,15 @@ define i64 @stest_f64i64(double %x) { ; RV32IF-NEXT: .LBB18_3: # %entry ; RV32IF-NEXT: slti a6, a4, 0 ; RV32IF-NEXT: .LBB18_4: # %entry -; RV32IF-NEXT: addi a7, a6, -1 -; RV32IF-NEXT: neg t0, a6 +; RV32IF-NEXT: neg a7, a6 +; RV32IF-NEXT: addi t0, a6, -1 ; RV32IF-NEXT: bnez a6, .LBB18_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB18_6: # %entry -; RV32IF-NEXT: or a3, a7, a3 -; RV32IF-NEXT: and a4, t0, a4 -; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: or a3, t0, a3 +; RV32IF-NEXT: and a4, a7, a4 +; RV32IF-NEXT: and a2, a7, a2 ; RV32IF-NEXT: beq a1, a0, .LBB18_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a0, a1 @@ -1213,15 +1213,15 @@ define i64 @stest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB18_3: # %entry ; RV32IFD-NEXT: slti a6, a4, 0 ; RV32IFD-NEXT: .LBB18_4: # %entry -; RV32IFD-NEXT: addi a7, a6, -1 -; RV32IFD-NEXT: neg t0, a6 +; RV32IFD-NEXT: neg a7, a6 +; RV32IFD-NEXT: addi t0, a6, -1 ; RV32IFD-NEXT: bnez a6, .LBB18_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB18_6: # %entry -; RV32IFD-NEXT: or a3, a7, a3 -; RV32IFD-NEXT: and a4, t0, a4 -; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: or a3, t0, a3 +; RV32IFD-NEXT: and a4, a7, a4 +; RV32IFD-NEXT: and a2, a7, a2 ; RV32IFD-NEXT: beq a1, a0, .LBB18_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a0, a1 @@ -1378,8 +1378,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: lw a3, 12(sp) ; RV32IF-NEXT: and a5, a2, a1 ; RV32IF-NEXT: beqz a5, .LBB20_7 ; RV32IF-NEXT: # %bb.6: # %entry @@ -1393,12 +1393,12 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: and a2, a2, a3 ; RV32IF-NEXT: bnez a0, .LBB20_10 ; RV32IF-NEXT: # %bb.9: -; RV32IF-NEXT: or a0, a2, a4 +; RV32IF-NEXT: or a0, a4, a2 ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB20_10: # %entry ; RV32IF-NEXT: neg a1, a1 -; RV32IF-NEXT: and a0, a1, a2 -; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: and a0, a1, a4 +; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -1461,8 +1461,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.4: # %entry ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) ; RV32IFD-NEXT: and a5, a2, a1 ; RV32IFD-NEXT: beqz a5, .LBB20_7 ; RV32IFD-NEXT: # %bb.6: # %entry @@ -1476,12 +1476,12 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: and a2, a2, a3 ; RV32IFD-NEXT: bnez a0, .LBB20_10 ; RV32IFD-NEXT: # %bb.9: -; RV32IFD-NEXT: or a0, a2, a4 +; RV32IFD-NEXT: or a0, a4, a2 ; RV32IFD-NEXT: snez a1, a0 ; RV32IFD-NEXT: .LBB20_10: # %entry ; RV32IFD-NEXT: neg a1, a1 -; RV32IFD-NEXT: and a0, a1, a2 -; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: and a0, a1, a4 +; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -1525,15 +1525,15 @@ define i64 @stest_f32i64(float %x) { ; RV32-NEXT: .LBB21_3: # %entry ; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB21_4: # %entry -; RV32-NEXT: addi a7, a6, -1 -; RV32-NEXT: neg t0, a6 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB21_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB21_6: # %entry -; RV32-NEXT: or a3, a7, a3 -; RV32-NEXT: and a4, t0, a4 -; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: or a3, t0, a3 +; RV32-NEXT: and a4, a7, a4 +; RV32-NEXT: and a2, a7, a2 ; RV32-NEXT: beq a1, a0, .LBB21_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a0, a1 @@ -1658,8 +1658,8 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB23_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1673,12 +1673,12 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 +; RV32-NEXT: or a0, a4, a2 ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB23_10: # %entry ; RV32-NEXT: neg a1, a1 -; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a0, a1, a4 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -1752,15 +1752,15 @@ define i64 @stest_f16i64(half %x) { ; RV32-NEXT: .LBB24_3: # %entry ; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB24_4: # %entry -; RV32-NEXT: addi a7, a6, -1 -; RV32-NEXT: neg t0, a6 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB24_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB24_6: # %entry -; RV32-NEXT: or a3, a7, a3 -; RV32-NEXT: and a4, t0, a4 -; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: or a3, t0, a3 +; RV32-NEXT: and a4, a7, a4 +; RV32-NEXT: and a2, a7, a2 ; RV32-NEXT: beq a1, a0, .LBB24_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a0, a1 @@ -1921,8 +1921,8 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB26_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1936,12 +1936,12 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: bnez a0, .LBB26_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 +; RV32-NEXT: or a0, a4, a2 ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB26_10: # %entry ; RV32-NEXT: neg a1, a1 -; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a0, a1, a4 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3046,15 +3046,15 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IF-NEXT: .LBB45_3: # %entry ; RV32IF-NEXT: slti a6, a4, 0 ; RV32IF-NEXT: .LBB45_4: # %entry -; RV32IF-NEXT: addi a7, a6, -1 -; RV32IF-NEXT: neg t0, a6 +; RV32IF-NEXT: neg a7, a6 +; RV32IF-NEXT: addi t0, a6, -1 ; RV32IF-NEXT: bnez a6, .LBB45_6 ; RV32IF-NEXT: # %bb.5: # %entry ; RV32IF-NEXT: mv a1, a5 ; RV32IF-NEXT: .LBB45_6: # %entry -; RV32IF-NEXT: or a3, a7, a3 -; RV32IF-NEXT: and a4, t0, a4 -; RV32IF-NEXT: and a2, t0, a2 +; RV32IF-NEXT: or a3, t0, a3 +; RV32IF-NEXT: and a4, a7, a4 +; RV32IF-NEXT: and a2, a7, a2 ; RV32IF-NEXT: beq a1, a0, .LBB45_8 ; RV32IF-NEXT: # %bb.7: # %entry ; RV32IF-NEXT: sltu a0, a0, a1 @@ -3149,15 +3149,15 @@ define i64 @stest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .LBB45_3: # %entry ; RV32IFD-NEXT: slti a6, a4, 0 ; RV32IFD-NEXT: .LBB45_4: # %entry -; RV32IFD-NEXT: addi a7, a6, -1 -; RV32IFD-NEXT: neg t0, a6 +; RV32IFD-NEXT: neg a7, a6 +; RV32IFD-NEXT: addi t0, a6, -1 ; RV32IFD-NEXT: bnez a6, .LBB45_6 ; RV32IFD-NEXT: # %bb.5: # %entry ; RV32IFD-NEXT: mv a1, a5 ; RV32IFD-NEXT: .LBB45_6: # %entry -; RV32IFD-NEXT: or a3, a7, a3 -; RV32IFD-NEXT: and a4, t0, a4 -; RV32IFD-NEXT: and a2, t0, a2 +; RV32IFD-NEXT: or a3, t0, a3 +; RV32IFD-NEXT: and a4, a7, a4 +; RV32IFD-NEXT: and a2, a7, a2 ; RV32IFD-NEXT: beq a1, a0, .LBB45_8 ; RV32IFD-NEXT: # %bb.7: # %entry ; RV32IFD-NEXT: sltu a0, a0, a1 @@ -3292,30 +3292,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a1, 8(sp) -; RV32IF-NEXT: lw a2, 12(sp) +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a2, 20(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a0, .LBB47_2 +; RV32IF-NEXT: beqz a2, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a0, 0 +; RV32IF-NEXT: slti a4, a2, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a0 +; RV32IF-NEXT: or a3, a3, a2 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 -; RV32IF-NEXT: and a2, a3, a2 ; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: slti a0, a0, 0 -; RV32IF-NEXT: addi a3, a0, -1 -; RV32IF-NEXT: and a0, a3, a1 -; RV32IF-NEXT: and a1, a3, a2 +; RV32IF-NEXT: and a2, a3, a2 +; RV32IF-NEXT: slti a2, a2, 0 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -3354,30 +3354,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a1, 8(sp) -; RV32IFD-NEXT: lw a2, 12(sp) +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 20(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a0, .LBB47_2 +; RV32IFD-NEXT: beqz a2, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a0, 0 +; RV32IFD-NEXT: slti a4, a2, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a0 +; RV32IFD-NEXT: or a3, a3, a2 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 -; RV32IFD-NEXT: and a2, a3, a2 ; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: slti a0, a0, 0 -; RV32IFD-NEXT: addi a3, a0, -1 -; RV32IFD-NEXT: and a0, a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 +; RV32IFD-NEXT: and a2, a3, a2 +; RV32IFD-NEXT: slti a2, a2, 0 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -3419,15 +3419,15 @@ define i64 @stest_f32i64_mm(float %x) { ; RV32-NEXT: .LBB48_3: # %entry ; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB48_4: # %entry -; RV32-NEXT: addi a7, a6, -1 -; RV32-NEXT: neg t0, a6 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB48_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB48_6: # %entry -; RV32-NEXT: or a3, a7, a3 -; RV32-NEXT: and a4, t0, a4 -; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: or a3, t0, a3 +; RV32-NEXT: and a4, a7, a4 +; RV32-NEXT: and a2, a7, a2 ; RV32-NEXT: beq a1, a0, .LBB48_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a0, a1 @@ -3530,30 +3530,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: lw a2, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a0, .LBB50_2 +; RV32-NEXT: beqz a2, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a4, a2, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 +; RV32-NEXT: or a3, a3, a2 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 -; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: and a0, a3, a1 -; RV32-NEXT: and a1, a3, a2 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: slti a2, a2, 0 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3620,15 +3620,15 @@ define i64 @stest_f16i64_mm(half %x) { ; RV32-NEXT: .LBB51_3: # %entry ; RV32-NEXT: slti a6, a4, 0 ; RV32-NEXT: .LBB51_4: # %entry -; RV32-NEXT: addi a7, a6, -1 -; RV32-NEXT: neg t0, a6 +; RV32-NEXT: neg a7, a6 +; RV32-NEXT: addi t0, a6, -1 ; RV32-NEXT: bnez a6, .LBB51_6 ; RV32-NEXT: # %bb.5: # %entry ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB51_6: # %entry -; RV32-NEXT: or a3, a7, a3 -; RV32-NEXT: and a4, t0, a4 -; RV32-NEXT: and a2, t0, a2 +; RV32-NEXT: or a3, t0, a3 +; RV32-NEXT: and a4, a7, a4 +; RV32-NEXT: and a2, a7, a2 ; RV32-NEXT: beq a1, a0, .LBB51_8 ; RV32-NEXT: # %bb.7: # %entry ; RV32-NEXT: sltu a0, a0, a1 @@ -3767,30 +3767,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 8(sp) -; RV32-NEXT: lw a2, 12(sp) +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 20(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a0, .LBB53_2 +; RV32-NEXT: beqz a2, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a4, a2, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 +; RV32-NEXT: or a3, a3, a2 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 -; RV32-NEXT: and a2, a3, a2 ; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: and a0, a3, a1 -; RV32-NEXT: and a1, a3, a2 +; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: slti a2, a2, 0 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index facb544..0c152e6 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -2262,12 +2262,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: addi a2, a3, -1 ; RV32IZHINX-NEXT: .LBB10_4: # %start ; RV32IZHINX-NEXT: feq.s a3, s0, s0 -; RV32IZHINX-NEXT: neg a4, a1 -; RV32IZHINX-NEXT: neg a1, s1 +; RV32IZHINX-NEXT: neg a4, s1 +; RV32IZHINX-NEXT: neg a5, a1 ; RV32IZHINX-NEXT: neg a3, a3 -; RV32IZHINX-NEXT: and a0, a1, a0 +; RV32IZHINX-NEXT: and a0, a4, a0 ; RV32IZHINX-NEXT: and a1, a3, a2 -; RV32IZHINX-NEXT: or a0, a4, a0 +; RV32IZHINX-NEXT: or a0, a5, a0 ; RV32IZHINX-NEXT: and a0, a3, a0 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2309,12 +2309,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: addi a2, a3, -1 ; RV32IZDINXZHINX-NEXT: .LBB10_4: # %start ; RV32IZDINXZHINX-NEXT: feq.s a3, s0, s0 -; RV32IZDINXZHINX-NEXT: neg a4, a1 -; RV32IZDINXZHINX-NEXT: neg a1, s1 +; RV32IZDINXZHINX-NEXT: neg a4, s1 +; RV32IZDINXZHINX-NEXT: neg a5, a1 ; RV32IZDINXZHINX-NEXT: neg a3, a3 -; RV32IZDINXZHINX-NEXT: and a0, a1, a0 +; RV32IZDINXZHINX-NEXT: and a0, a4, a0 ; RV32IZDINXZHINX-NEXT: and a1, a3, a2 -; RV32IZDINXZHINX-NEXT: or a0, a4, a0 +; RV32IZDINXZHINX-NEXT: or a0, a5, a0 ; RV32IZDINXZHINX-NEXT: and a0, a3, a0 ; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2653,12 +2653,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: addi a2, a3, -1 ; CHECK32-IZHINXMIN-NEXT: .LBB10_4: # %start ; CHECK32-IZHINXMIN-NEXT: feq.s a3, s0, s0 -; CHECK32-IZHINXMIN-NEXT: neg a4, a1 -; CHECK32-IZHINXMIN-NEXT: neg a1, s1 +; CHECK32-IZHINXMIN-NEXT: neg a4, s1 +; CHECK32-IZHINXMIN-NEXT: neg a5, a1 ; CHECK32-IZHINXMIN-NEXT: neg a3, a3 -; CHECK32-IZHINXMIN-NEXT: and a0, a1, a0 +; CHECK32-IZHINXMIN-NEXT: and a0, a4, a0 ; CHECK32-IZHINXMIN-NEXT: and a1, a3, a2 -; CHECK32-IZHINXMIN-NEXT: or a0, a4, a0 +; CHECK32-IZHINXMIN-NEXT: or a0, a5, a0 ; CHECK32-IZHINXMIN-NEXT: and a0, a3, a0 ; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2701,12 +2701,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a3, -1 ; CHECK32-IZDINXZHINXMIN-NEXT: .LBB10_4: # %start ; CHECK32-IZDINXZHINXMIN-NEXT: feq.s a3, s0, s0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: neg a1, s1 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a4, s1 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a5, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: neg a3, a3 -; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a4, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: and a1, a3, a2 -; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a4, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a5, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, a3, a0 ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -2972,18 +2972,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: fcvt.s.h a0, a0 -; RV32IZHINX-NEXT: lui a1, 391168 -; RV32IZHINX-NEXT: addi a1, a1, -1 -; RV32IZHINX-NEXT: fle.s a2, zero, a0 -; RV32IZHINX-NEXT: flt.s a1, a1, a0 -; RV32IZHINX-NEXT: neg s0, a1 -; RV32IZHINX-NEXT: neg s1, a2 +; RV32IZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZHINX-NEXT: fle.s a0, zero, s0 +; RV32IZHINX-NEXT: neg s1, a0 +; RV32IZHINX-NEXT: mv a0, s0 ; RV32IZHINX-NEXT: call __fixunssfdi ; RV32IZHINX-NEXT: and a0, s1, a0 +; RV32IZHINX-NEXT: lui a2, 391168 ; RV32IZHINX-NEXT: and a1, s1, a1 -; RV32IZHINX-NEXT: or a0, s0, a0 -; RV32IZHINX-NEXT: or a1, s0, a1 +; RV32IZHINX-NEXT: addi a2, a2, -1 +; RV32IZHINX-NEXT: flt.s a2, a2, s0 +; RV32IZHINX-NEXT: neg a2, a2 +; RV32IZHINX-NEXT: or a0, a2, a0 +; RV32IZHINX-NEXT: or a1, a2, a1 ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3005,18 +3006,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZDINXZHINX-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZDINXZHINX-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZDINXZHINX-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32IZDINXZHINX-NEXT: fcvt.s.h a0, a0 -; RV32IZDINXZHINX-NEXT: lui a1, 391168 -; RV32IZDINXZHINX-NEXT: addi a1, a1, -1 -; RV32IZDINXZHINX-NEXT: fle.s a2, zero, a0 -; RV32IZDINXZHINX-NEXT: flt.s a1, a1, a0 -; RV32IZDINXZHINX-NEXT: neg s0, a1 -; RV32IZDINXZHINX-NEXT: neg s1, a2 +; RV32IZDINXZHINX-NEXT: fcvt.s.h s0, a0 +; RV32IZDINXZHINX-NEXT: fle.s a0, zero, s0 +; RV32IZDINXZHINX-NEXT: neg s1, a0 +; RV32IZDINXZHINX-NEXT: mv a0, s0 ; RV32IZDINXZHINX-NEXT: call __fixunssfdi ; RV32IZDINXZHINX-NEXT: and a0, s1, a0 +; RV32IZDINXZHINX-NEXT: lui a2, 391168 ; RV32IZDINXZHINX-NEXT: and a1, s1, a1 -; RV32IZDINXZHINX-NEXT: or a0, s0, a0 -; RV32IZDINXZHINX-NEXT: or a1, s0, a1 +; RV32IZDINXZHINX-NEXT: addi a2, a2, -1 +; RV32IZDINXZHINX-NEXT: flt.s a2, a2, s0 +; RV32IZDINXZHINX-NEXT: neg a2, a2 +; RV32IZDINXZHINX-NEXT: or a0, a2, a0 +; RV32IZDINXZHINX-NEXT: or a1, a2, a1 ; RV32IZDINXZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3217,18 +3219,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32-IZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32-IZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32-IZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK32-IZHINXMIN-NEXT: lui a1, 391168 -; CHECK32-IZHINXMIN-NEXT: addi a1, a1, -1 -; CHECK32-IZHINXMIN-NEXT: fle.s a2, zero, a0 -; CHECK32-IZHINXMIN-NEXT: flt.s a1, a1, a0 -; CHECK32-IZHINXMIN-NEXT: neg s0, a1 -; CHECK32-IZHINXMIN-NEXT: neg s1, a2 +; CHECK32-IZHINXMIN-NEXT: fcvt.s.h s0, a0 +; CHECK32-IZHINXMIN-NEXT: fle.s a0, zero, s0 +; CHECK32-IZHINXMIN-NEXT: neg s1, a0 +; CHECK32-IZHINXMIN-NEXT: mv a0, s0 ; CHECK32-IZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZHINXMIN-NEXT: and a0, s1, a0 +; CHECK32-IZHINXMIN-NEXT: lui a2, 391168 ; CHECK32-IZHINXMIN-NEXT: and a1, s1, a1 -; CHECK32-IZHINXMIN-NEXT: or a0, s0, a0 -; CHECK32-IZHINXMIN-NEXT: or a1, s0, a1 +; CHECK32-IZHINXMIN-NEXT: addi a2, a2, -1 +; CHECK32-IZHINXMIN-NEXT: flt.s a2, a2, s0 +; CHECK32-IZHINXMIN-NEXT: neg a2, a2 +; CHECK32-IZHINXMIN-NEXT: or a0, a2, a0 +; CHECK32-IZHINXMIN-NEXT: or a1, a2, a1 ; CHECK32-IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -3251,18 +3254,19 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32-IZDINXZHINXMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32-IZDINXZHINXMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: lui a1, 391168 -; CHECK32-IZDINXZHINXMIN-NEXT: addi a1, a1, -1 -; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a2, zero, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a1, a1, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s0, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a2 +; CHECK32-IZDINXZHINXMIN-NEXT: fcvt.s.h s0, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: fle.s a0, zero, s0 +; CHECK32-IZDINXZHINXMIN-NEXT: neg s1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: mv a0, s0 ; CHECK32-IZDINXZHINXMIN-NEXT: call __fixunssfdi ; CHECK32-IZDINXZHINXMIN-NEXT: and a0, s1, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: lui a2, 391168 ; CHECK32-IZDINXZHINXMIN-NEXT: and a1, s1, a1 -; CHECK32-IZDINXZHINXMIN-NEXT: or a0, s0, a0 -; CHECK32-IZDINXZHINXMIN-NEXT: or a1, s0, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: addi a2, a2, -1 +; CHECK32-IZDINXZHINXMIN-NEXT: flt.s a2, a2, s0 +; CHECK32-IZDINXZHINXMIN-NEXT: neg a2, a2 +; CHECK32-IZDINXZHINXMIN-NEXT: or a0, a2, a0 +; CHECK32-IZDINXZHINXMIN-NEXT: or a1, a2, a1 ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: lw s1, 4(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index 774f1a1..c157c63 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -301,58 +301,58 @@ define i64 @select_abs64(i64 %x) { define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a3, .LBB8_2 +; RV32I-NEXT: bgez a2, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 ; RV32I-NEXT: snez a6, a4 -; RV32I-NEXT: snez a7, a2 +; RV32I-NEXT: snez a7, a3 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sltu a3, a5, a6 +; RV32I-NEXT: sltu a2, a5, a6 ; RV32I-NEXT: neg a7, a1 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a2, 12(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a3, .LBB8_2 +; RV32ZBB-NEXT: bgez a2, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 ; RV32ZBB-NEXT: snez a6, a4 -; RV32ZBB-NEXT: snez a7, a2 +; RV32ZBB-NEXT: snez a7, a3 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: or a6, a7, a6 -; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sub a4, a4, a7 -; RV32ZBB-NEXT: sltu a3, a5, a6 +; RV32ZBB-NEXT: sltu a2, a5, a6 ; RV32ZBB-NEXT: neg a7, a1 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a2, a7, a2 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB8_2: -; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) ; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: abs128: @@ -383,58 +383,58 @@ define i128 @abs128(i128 %x) { define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 12(a1) -; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: lw a2, 12(a1) +; RV32I-NEXT: lw a3, 0(a1) ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a3, .LBB9_2 +; RV32I-NEXT: bgez a2, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: neg a5, a1 ; RV32I-NEXT: snez a6, a4 -; RV32I-NEXT: snez a7, a2 +; RV32I-NEXT: snez a7, a3 ; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: neg a4, a4 ; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: add a1, a2, a1 ; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sltu a3, a5, a6 +; RV32I-NEXT: sltu a2, a5, a6 ; RV32I-NEXT: neg a7, a1 ; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: sub a2, a7, a2 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: sw a4, 4(a0) ; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 12(a1) -; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a2, 12(a1) +; RV32ZBB-NEXT: lw a3, 0(a1) ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a3, .LBB9_2 +; RV32ZBB-NEXT: bgez a2, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: neg a5, a1 ; RV32ZBB-NEXT: snez a6, a4 -; RV32ZBB-NEXT: snez a7, a2 +; RV32ZBB-NEXT: snez a7, a3 ; RV32ZBB-NEXT: snez a1, a1 ; RV32ZBB-NEXT: neg a4, a4 ; RV32ZBB-NEXT: or a6, a7, a6 -; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: add a1, a2, a1 ; RV32ZBB-NEXT: sub a4, a4, a7 -; RV32ZBB-NEXT: sltu a3, a5, a6 +; RV32ZBB-NEXT: sltu a2, a5, a6 ; RV32ZBB-NEXT: neg a7, a1 ; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: sub a2, a7, a2 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: .LBB9_2: -; RV32ZBB-NEXT: sw a2, 0(a0) +; RV32ZBB-NEXT: sw a3, 0(a0) ; RV32ZBB-NEXT: sw a4, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: select_abs128: diff --git a/llvm/test/CodeGen/RISCV/macro-fusions.mir b/llvm/test/CodeGen/RISCV/macro-fusions.mir index 1346414..ae5b52d 100644 --- a/llvm/test/CodeGen/RISCV/macro-fusions.mir +++ b/llvm/test/CodeGen/RISCV/macro-fusions.mir @@ -2,7 +2,12 @@ # RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \ # RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ # RUN: -mattr=+lui-addi-fusion,+auipc-addi-fusion,+zexth-fusion,+zextw-fusion,+shifted-zextw-fusion,+ld-add-fusion \ +# RUN: -mattr=+add-load-fusion,+auipc-load-fusion,+lui-load-fusion,+addi-load-fusion \ +# RUN: -mattr=+zba,+shxadd-load-fusion \ # RUN: | FileCheck %s +# RUN: llc -mtriple=riscv64-linux-gnu -x=mir < %s \ +# RUN: -debug-only=machine-scheduler -start-before=machine-scheduler 2>&1 \ +# RUN: -mattr=+zba,+bfext-fusion | FileCheck --check-prefixes=CHECK-BFEXT %s # CHECK: lui_addi:%bb.0 # CHECK: Macro fuse: {{.*}}LUI - ADDI @@ -174,3 +179,1374 @@ body: | $x11 = COPY %5 PseudoRET ... + +# CHECK: add_lb +# CHECK: Macro fuse: {{.*}}ADD - LB +--- +name: add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lh +# CHECK: Macro fuse: {{.*}}ADD - LH +--- +name: add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lw +# CHECK: Macro fuse: {{.*}}ADD - LW +--- +name: add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lbu +# CHECK: Macro fuse: {{.*}}ADD - LBU +--- +name: add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lhu +# CHECK: Macro fuse: {{.*}}ADD - LHU +--- +name: add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: add_lwu +# CHECK: Macro fuse: {{.*}}ADD - LWU +--- +name: add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: auipc_lb +# CHECK: Macro fuse: {{.*}}AUIPC - LB +--- +name: auipc_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LB %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lh +# CHECK: Macro fuse: {{.*}}AUIPC - LH +--- +name: auipc_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LH %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lw +# CHECK: Macro fuse: {{.*}}AUIPC - LW +--- +name: auipc_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LW %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_ld +# CHECK: Macro fuse: {{.*}}AUIPC - LD +--- +name: auipc_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LD %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lbu +# CHECK: Macro fuse: {{.*}}AUIPC - LBU +--- +name: auipc_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LBU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lhu +# CHECK: Macro fuse: {{.*}}AUIPC - LHU +--- +name: auipc_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LHU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: auipc_lwu +# CHECK: Macro fuse: {{.*}}AUIPC - LWU +--- +name: auipc_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = AUIPC 1 + %3:gpr = XORI %1, 2 + %4:gpr = LWU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lb +# CHECK: Macro fuse: {{.*}}LUI - LB +--- +name: lui_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LB %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lh +# CHECK: Macro fuse: {{.*}}LUI - LH +--- +name: lui_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LH %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lw +# CHECK: Macro fuse: {{.*}}LUI - LW +--- +name: lui_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LW %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_ld +# CHECK: Macro fuse: {{.*}}LUI - LD +--- +name: lui_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LD %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lbu +# CHECK: Macro fuse: {{.*}}LUI - LBU +--- +name: lui_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LBU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lhu +# CHECK: Macro fuse: {{.*}}LUI - LHU +--- +name: lui_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LHU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: lui_lwu +# CHECK: Macro fuse: {{.*}}LUI - LWU +--- +name: lui_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = LUI 1 + %3:gpr = XORI %1, 2 + %4:gpr = LWU %2, 4 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK-BFEXT: bitfield_extract +# CHECK-BFEXT: Macro fuse: {{.*}}SLLI - SRLI +--- +name: bitfield_extract +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10 + %1:gpr = COPY $x10 + %2:gpr = SLLI %1, 31 + %3:gpr = XORI %1, 3 + %4:gpr = SRLI %2, 48 + $x10 = COPY %3 + $x11 = COPY %4 + PseudoRET +... + +# CHECK: addi_lb +# CHECK: Macro fuse: {{.*}}ADDI - LB +--- +name: addi_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lh +# CHECK: Macro fuse: {{.*}}ADDI - LH +--- +name: addi_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lw +# CHECK: Macro fuse: {{.*}}ADDI - LW +--- +name: addi_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_ld +# CHECK: Macro fuse: {{.*}}ADDI - LD +--- +name: addi_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lbu +# CHECK: Macro fuse: {{.*}}ADDI - LBU +--- +name: addi_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lhu +# CHECK: Macro fuse: {{.*}}ADDI - LHU +--- +name: addi_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: addi_lwu +# CHECK: Macro fuse: {{.*}}ADDI - LWU +--- +name: addi_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADDI %1, 8 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lb +# CHECK: Macro fuse: {{.*}}ADD_UW - LB +--- +name: adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lh +# CHECK: Macro fuse: {{.*}}ADD_UW - LH +--- +name: adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lw +# CHECK: Macro fuse: {{.*}}ADD_UW - LW +--- +name: adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_ld +# CHECK: Macro fuse: {{.*}}ADD_UW - LD +--- +name: adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lbu +# CHECK: Macro fuse: {{.*}}ADD_UW - LBU +--- +name: adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lhu +# CHECK: Macro fuse: {{.*}}ADD_UW - LHU +--- +name: adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: adduw_lwu +# CHECK: Macro fuse: {{.*}}ADD_UW - LWU +--- +name: adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 0 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lb +# CHECK: Macro fuse: {{.*}}SH1ADD - LB +--- +name: sh1add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lb +# CHECK: Macro fuse: {{.*}}SH2ADD - LB +--- +name: sh2add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lb +# CHECK: Macro fuse: {{.*}}SH3ADD - LB +--- +name: sh3add_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lh +# CHECK: Macro fuse: {{.*}}SH1ADD - LH +--- +name: sh1add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lh +# CHECK: Macro fuse: {{.*}}SH2ADD - LH +--- +name: sh2add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lh +# CHECK: Macro fuse: {{.*}}SH3ADD - LH +--- +name: sh3add_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lw +# CHECK: Macro fuse: {{.*}}SH1ADD - LW +--- +name: sh1add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lw +# CHECK: Macro fuse: {{.*}}SH2ADD - LW +--- +name: sh2add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lw +# CHECK: Macro fuse: {{.*}}SH3ADD - LW +--- +name: sh3add_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_ld +# CHECK: Macro fuse: {{.*}}SH1ADD - LD +--- +name: sh1add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_ld +# CHECK: Macro fuse: {{.*}}SH2ADD - LD +--- +name: sh2add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_ld +# CHECK: Macro fuse: {{.*}}SH3ADD - LD +--- +name: sh3add_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lbu +# CHECK: Macro fuse: {{.*}}SH1ADD - LBU +--- +name: sh1add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lbu +# CHECK: Macro fuse: {{.*}}SH2ADD - LBU +--- +name: sh2add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lbu +# CHECK: Macro fuse: {{.*}}SH3ADD - LBU +--- +name: sh3add_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lhu +# CHECK: Macro fuse: {{.*}}SH1ADD - LHU +--- +name: sh1add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lhu +# CHECK: Macro fuse: {{.*}}SH2ADD - LHU +--- +name: sh2add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lhu +# CHECK: Macro fuse: {{.*}}SH3ADD - LHU +--- +name: sh3add_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1add_lwu +# CHECK: Macro fuse: {{.*}}SH1ADD - LWU +--- +name: sh1add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2add_lwu +# CHECK: Macro fuse: {{.*}}SH2ADD - LWU +--- +name: sh2add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3add_lwu +# CHECK: Macro fuse: {{.*}}SH3ADD - LWU +--- +name: sh3add_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lb +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LB +--- +name: sh1adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lb +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LB +--- +name: sh2adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lb +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LB +--- +name: sh3adduw_lb +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LB %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lh +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LH +--- +name: sh1adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lh +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LH +--- +name: sh2adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lh +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LH +--- +name: sh3adduw_lh +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LH %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lw +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LW +--- +name: sh1adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lw +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LW +--- +name: sh2adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lw +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LW +--- +name: sh3adduw_lw +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LW %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_ld +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LD +--- +name: sh1adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_ld +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LD +--- +name: sh2adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_ld +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LD +--- +name: sh3adduw_ld +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LD %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lbu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LBU +--- +name: sh1adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lbu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LBU +--- +name: sh2adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lbu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LBU +--- +name: sh3adduw_lbu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LBU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lhu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LHU +--- +name: sh1adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lhu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LHU +--- +name: sh2adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lhu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LHU +--- +name: sh3adduw_lhu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LHU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh1adduw_lwu +# CHECK: Macro fuse: {{.*}}SH1ADD_UW - LWU +--- +name: sh1adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH1ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh2adduw_lwu +# CHECK: Macro fuse: {{.*}}SH2ADD_UW - LWU +--- +name: sh2adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH2ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... + +# CHECK: sh3adduw_lwu +# CHECK: Macro fuse: {{.*}}SH3ADD_UW - LWU +--- +name: sh3adduw_lwu +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x10, $x11 + %1:gpr = COPY $x10 + %2:gpr = COPY $x11 + %3:gpr = SH3ADD_UW %1, %2 + %4:gpr = XORI %2, 3 + %5:gpr = LWU %3, 8 + $x10 = COPY %4 + $x11 = COPY %5 + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll index 160f0ae..abdc1ba 100644 --- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll +++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll @@ -1,17 +1,42 @@ ; REQUIRES: asserts -; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \ +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s -; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \ +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only ; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s ; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ ; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ ; RUN: | FileCheck -check-prefix=LDCLUSTER %s - +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: ********** MI Scheduling ********** @@ -22,6 +47,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 ; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 ; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: load_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; ; LDCLUSTER: ********** MI Scheduling ********** ; LDCLUSTER-LABEL: load_clustering_1:%bb.0 ; LDCLUSTER: *** Final schedule for %bb.0 *** @@ -29,6 +62,14 @@ define i32 @load_clustering_1(ptr nocapture %p) { ; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 ; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 ; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4 +; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8 +; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12 +; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16 entry: %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 %val0 = load i32, ptr %arrayidx0 diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir index 21398d3..01960f9 100644 --- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir +++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir @@ -1,10 +1,12 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ -# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \ +# RUN: -mattr=+disable-postmisched-load-clustering \ +# RUN: -mattr=+disable-postmisched-store-clustering -debug-only=machine-scheduler \ # RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \ # RUN: | FileCheck -check-prefix=NOCLUSTER %s # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \ diff --git a/llvm/test/CodeGen/RISCV/misched-store-clustering.ll b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll new file mode 100644 index 0000000..02e853d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/misched-store-clustering.ll @@ -0,0 +1,83 @@ +; REQUIRES: asserts +; +; Disable all misched clustering +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering,+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=NOCLUSTER %s +; +; ST misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-load-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=STCLUSTER %s +; +; LD misched clustering only +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -mattr=+disable-misched-store-clustering \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=LDCLUSTER %s +; +; Default misched cluster settings (i.e. both LD and ST clustering) +; RUN: llc -mtriple=riscv32 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s +; RUN: llc -mtriple=riscv64 -verify-misched \ +; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \ +; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s + +define i32 @store_clustering_1(ptr nocapture %p, i32 %v) { +; NOCLUSTER: ********** MI Scheduling ********** +; NOCLUSTER-LABEL: store_clustering_1:%bb.0 +; NOCLUSTER: *** Final schedule for %bb.0 *** +; NOCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; NOCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; NOCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; NOCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; STCLUSTER: ********** MI Scheduling ********** +; STCLUSTER-LABEL: store_clustering_1:%bb.0 +; STCLUSTER: *** Final schedule for %bb.0 *** +; STCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; STCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; STCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; STCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; LDCLUSTER: ********** MI Scheduling ********** +; LDCLUSTER-LABEL: store_clustering_1:%bb.0 +; LDCLUSTER: *** Final schedule for %bb.0 *** +; LDCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; LDCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; LDCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; LDCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +; +; DEFAULTCLUSTER: ********** MI Scheduling ********** +; DEFAULTCLUSTER-LABEL: store_clustering_1:%bb.0 +; DEFAULTCLUSTER: *** Final schedule for %bb.0 *** +; DEFAULTCLUSTER: SU(4): SW %1:gpr, %0:gpr, 4 :: (store (s32) into %ir.arrayidx2) +; DEFAULTCLUSTER: SU(3): SW %1:gpr, %0:gpr, 8 :: (store (s32) into %ir.arrayidx1) +; DEFAULTCLUSTER: SU(2): SW %1:gpr, %0:gpr, 12 :: (store (s32) into %ir.arrayidx0) +; DEFAULTCLUSTER: SU(5): SW %1:gpr, %0:gpr, 16 :: (store (s32) into %ir.arrayidx3) +entry: + %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3 + store i32 %v, ptr %arrayidx0 + %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2 + store i32 %v, ptr %arrayidx1 + %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1 + store i32 %v, ptr %arrayidx2 + %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4 + store i32 %v, ptr %arrayidx3 + ret i32 %v +} diff --git a/llvm/test/CodeGen/RISCV/note-gnu-property-zicfiss.ll b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfiss.ll index 24d63cb..efc4439 100644 --- a/llvm/test/CodeGen/RISCV/note-gnu-property-zicfiss.ll +++ b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfiss.ll @@ -7,19 +7,18 @@ ; ASM: .section ".note.GNU-stack","",@progbits ; ASM-NEXT: .section .note.gnu.property,"a",@note +; ASM32-NEXT: .p2align 2, 0x0 +; ASM64-NEXT: .p2align 3, 0x0 ; ASM-NEXT: .word 4 -; ASM-NEXT: .word .Ltmp1-.Ltmp0 +; ASM32-NEXT: .word 12 +; ASM64-NEXT: .word 16 ; ASM-NEXT: .word 5 ; ASM-NEXT: .asciz "GNU" -; ASM-NEXT: .Ltmp0: -; ASM32-NEXT: .p2align 2, 0x0 -; ASM64-NEXT: .p2align 3, 0x0 ; ASM-NEXT: .word 3221225472 ; ASM-NEXT: .word 4 ; ASM-NEXT: .word 2 ; ASM32-NEXT: .p2align 2, 0x0 ; ASM64-NEXT: .p2align 3, 0x0 -; ASM-NEXT: .Ltmp1: define i32 @f() "hw-shadow-stack" { entry: diff --git a/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr-error.ll b/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr-error.ll new file mode 100644 index 0000000..19cc994 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr-error.ll @@ -0,0 +1,9 @@ +; RUN: not llc -mtriple riscv32-unknown-elf -mattr=-smrnmi -o - %s 2>&1 \ +; RUN: | FileCheck %s +; RUN: not llc -mtriple riscv64-unknown-elf -mattr=-smrnmi -o - %s 2>&1 \ +; RUN: | FileCheck %s + +; CHECK: LLVM ERROR: 'rnmi' interrupt kind requires Srnmi extension +define void @test_rnmi() "interrupt"="rnmi" { + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr.ll b/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr.ll new file mode 100644 index 0000000..03236a0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rnmi-interrupt-attr.ll @@ -0,0 +1,373 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+smrnmi -o - %s \ +; RUN: -verify-machineinstrs | FileCheck --check-prefix=RNMI-RV32 %s + +; RUN: llc -mtriple riscv32-unknown-elf -mattr=+smrnmi -o - %s \ +; RUN: -verify-machineinstrs -frame-pointer=all | FileCheck --check-prefix=RNMI-RV32-FP %s + +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+smrnmi -o - %s \ +; RUN: -verify-machineinstrs | FileCheck --check-prefix=RNMI-RV64 %s + +; RUN: llc -mtriple riscv64-unknown-elf -mattr=+smrnmi -o - %s \ +; RUN: -verify-machineinstrs -frame-pointer=all | FileCheck --check-prefix=RNMI-RV64-FP %s + +define void @test_rnmi_empty() "interrupt"="rnmi" { +; RNMI-RV32-LABEL: test_rnmi_empty: +; RNMI-RV32: # %bb.0: +; RNMI-RV32-NEXT: mnret +; +; RNMI-RV32-FP-LABEL: test_rnmi_empty: +; RNMI-RV32-FP: # %bb.0: +; RNMI-RV32-FP-NEXT: addi sp, sp, -16 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa_offset 16 +; RNMI-RV32-FP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: .cfi_offset ra, -4 +; RNMI-RV32-FP-NEXT: .cfi_offset s0, -8 +; RNMI-RV32-FP-NEXT: addi s0, sp, 16 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa s0, 0 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa sp, 16 +; RNMI-RV32-FP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: .cfi_restore ra +; RNMI-RV32-FP-NEXT: .cfi_restore s0 +; RNMI-RV32-FP-NEXT: addi sp, sp, 16 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV32-FP-NEXT: mnret +; +; RNMI-RV64-LABEL: test_rnmi_empty: +; RNMI-RV64: # %bb.0: +; RNMI-RV64-NEXT: mnret +; +; RNMI-RV64-FP-LABEL: test_rnmi_empty: +; RNMI-RV64-FP: # %bb.0: +; RNMI-RV64-FP-NEXT: addi sp, sp, -16 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa_offset 16 +; RNMI-RV64-FP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: .cfi_offset ra, -8 +; RNMI-RV64-FP-NEXT: .cfi_offset s0, -16 +; RNMI-RV64-FP-NEXT: addi s0, sp, 16 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa s0, 0 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa sp, 16 +; RNMI-RV64-FP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: .cfi_restore ra +; RNMI-RV64-FP-NEXT: .cfi_restore s0 +; RNMI-RV64-FP-NEXT: addi sp, sp, 16 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV64-FP-NEXT: mnret + ret void +} + +declare void @callee() + +define void @test_rnmi_caller() "interrupt"="rnmi" { +; RNMI-RV32-LABEL: test_rnmi_caller: +; RNMI-RV32: # %bb.0: +; RNMI-RV32-NEXT: addi sp, sp, -64 +; RNMI-RV32-NEXT: .cfi_def_cfa_offset 64 +; RNMI-RV32-NEXT: sw ra, 60(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t0, 56(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t1, 52(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t2, 48(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a0, 44(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a2, 36(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a3, 32(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a4, 28(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a5, 24(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a6, 20(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw a7, 16(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t3, 12(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t4, 8(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t5, 4(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: sw t6, 0(sp) # 4-byte Folded Spill +; RNMI-RV32-NEXT: .cfi_offset ra, -4 +; RNMI-RV32-NEXT: .cfi_offset t0, -8 +; RNMI-RV32-NEXT: .cfi_offset t1, -12 +; RNMI-RV32-NEXT: .cfi_offset t2, -16 +; RNMI-RV32-NEXT: .cfi_offset a0, -20 +; RNMI-RV32-NEXT: .cfi_offset a1, -24 +; RNMI-RV32-NEXT: .cfi_offset a2, -28 +; RNMI-RV32-NEXT: .cfi_offset a3, -32 +; RNMI-RV32-NEXT: .cfi_offset a4, -36 +; RNMI-RV32-NEXT: .cfi_offset a5, -40 +; RNMI-RV32-NEXT: .cfi_offset a6, -44 +; RNMI-RV32-NEXT: .cfi_offset a7, -48 +; RNMI-RV32-NEXT: .cfi_offset t3, -52 +; RNMI-RV32-NEXT: .cfi_offset t4, -56 +; RNMI-RV32-NEXT: .cfi_offset t5, -60 +; RNMI-RV32-NEXT: .cfi_offset t6, -64 +; RNMI-RV32-NEXT: call callee +; RNMI-RV32-NEXT: lw ra, 60(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t0, 56(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t1, 52(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t2, 48(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a0, 44(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a1, 40(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a2, 36(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a3, 32(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a4, 28(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a5, 24(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a6, 20(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw a7, 16(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t3, 12(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t4, 8(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t5, 4(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: lw t6, 0(sp) # 4-byte Folded Reload +; RNMI-RV32-NEXT: .cfi_restore ra +; RNMI-RV32-NEXT: .cfi_restore t0 +; RNMI-RV32-NEXT: .cfi_restore t1 +; RNMI-RV32-NEXT: .cfi_restore t2 +; RNMI-RV32-NEXT: .cfi_restore a0 +; RNMI-RV32-NEXT: .cfi_restore a1 +; RNMI-RV32-NEXT: .cfi_restore a2 +; RNMI-RV32-NEXT: .cfi_restore a3 +; RNMI-RV32-NEXT: .cfi_restore a4 +; RNMI-RV32-NEXT: .cfi_restore a5 +; RNMI-RV32-NEXT: .cfi_restore a6 +; RNMI-RV32-NEXT: .cfi_restore a7 +; RNMI-RV32-NEXT: .cfi_restore t3 +; RNMI-RV32-NEXT: .cfi_restore t4 +; RNMI-RV32-NEXT: .cfi_restore t5 +; RNMI-RV32-NEXT: .cfi_restore t6 +; RNMI-RV32-NEXT: addi sp, sp, 64 +; RNMI-RV32-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV32-NEXT: mnret +; +; RNMI-RV32-FP-LABEL: test_rnmi_caller: +; RNMI-RV32-FP: # %bb.0: +; RNMI-RV32-FP-NEXT: addi sp, sp, -80 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa_offset 80 +; RNMI-RV32-FP-NEXT: sw ra, 76(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t0, 72(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t1, 68(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t2, 64(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw s0, 60(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a0, 56(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a1, 52(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a2, 48(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a3, 44(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a4, 40(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a5, 36(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a6, 32(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw a7, 28(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t3, 24(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t4, 20(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t5, 16(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: sw t6, 12(sp) # 4-byte Folded Spill +; RNMI-RV32-FP-NEXT: .cfi_offset ra, -4 +; RNMI-RV32-FP-NEXT: .cfi_offset t0, -8 +; RNMI-RV32-FP-NEXT: .cfi_offset t1, -12 +; RNMI-RV32-FP-NEXT: .cfi_offset t2, -16 +; RNMI-RV32-FP-NEXT: .cfi_offset s0, -20 +; RNMI-RV32-FP-NEXT: .cfi_offset a0, -24 +; RNMI-RV32-FP-NEXT: .cfi_offset a1, -28 +; RNMI-RV32-FP-NEXT: .cfi_offset a2, -32 +; RNMI-RV32-FP-NEXT: .cfi_offset a3, -36 +; RNMI-RV32-FP-NEXT: .cfi_offset a4, -40 +; RNMI-RV32-FP-NEXT: .cfi_offset a5, -44 +; RNMI-RV32-FP-NEXT: .cfi_offset a6, -48 +; RNMI-RV32-FP-NEXT: .cfi_offset a7, -52 +; RNMI-RV32-FP-NEXT: .cfi_offset t3, -56 +; RNMI-RV32-FP-NEXT: .cfi_offset t4, -60 +; RNMI-RV32-FP-NEXT: .cfi_offset t5, -64 +; RNMI-RV32-FP-NEXT: .cfi_offset t6, -68 +; RNMI-RV32-FP-NEXT: addi s0, sp, 80 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa s0, 0 +; RNMI-RV32-FP-NEXT: call callee +; RNMI-RV32-FP-NEXT: .cfi_def_cfa sp, 80 +; RNMI-RV32-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t0, 72(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t1, 68(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t2, 64(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw s0, 60(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a1, 52(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a2, 48(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a3, 44(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a4, 40(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a5, 36(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a6, 32(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw a7, 28(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t3, 24(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t4, 20(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t5, 16(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: lw t6, 12(sp) # 4-byte Folded Reload +; RNMI-RV32-FP-NEXT: .cfi_restore ra +; RNMI-RV32-FP-NEXT: .cfi_restore t0 +; RNMI-RV32-FP-NEXT: .cfi_restore t1 +; RNMI-RV32-FP-NEXT: .cfi_restore t2 +; RNMI-RV32-FP-NEXT: .cfi_restore s0 +; RNMI-RV32-FP-NEXT: .cfi_restore a0 +; RNMI-RV32-FP-NEXT: .cfi_restore a1 +; RNMI-RV32-FP-NEXT: .cfi_restore a2 +; RNMI-RV32-FP-NEXT: .cfi_restore a3 +; RNMI-RV32-FP-NEXT: .cfi_restore a4 +; RNMI-RV32-FP-NEXT: .cfi_restore a5 +; RNMI-RV32-FP-NEXT: .cfi_restore a6 +; RNMI-RV32-FP-NEXT: .cfi_restore a7 +; RNMI-RV32-FP-NEXT: .cfi_restore t3 +; RNMI-RV32-FP-NEXT: .cfi_restore t4 +; RNMI-RV32-FP-NEXT: .cfi_restore t5 +; RNMI-RV32-FP-NEXT: .cfi_restore t6 +; RNMI-RV32-FP-NEXT: addi sp, sp, 80 +; RNMI-RV32-FP-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV32-FP-NEXT: mnret +; +; RNMI-RV64-LABEL: test_rnmi_caller: +; RNMI-RV64: # %bb.0: +; RNMI-RV64-NEXT: addi sp, sp, -128 +; RNMI-RV64-NEXT: .cfi_def_cfa_offset 128 +; RNMI-RV64-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t1, 104(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t2, 96(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a1, 80(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a3, 64(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a4, 56(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a5, 48(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a6, 40(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd a7, 32(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t3, 24(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t4, 16(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t5, 8(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: sd t6, 0(sp) # 8-byte Folded Spill +; RNMI-RV64-NEXT: .cfi_offset ra, -8 +; RNMI-RV64-NEXT: .cfi_offset t0, -16 +; RNMI-RV64-NEXT: .cfi_offset t1, -24 +; RNMI-RV64-NEXT: .cfi_offset t2, -32 +; RNMI-RV64-NEXT: .cfi_offset a0, -40 +; RNMI-RV64-NEXT: .cfi_offset a1, -48 +; RNMI-RV64-NEXT: .cfi_offset a2, -56 +; RNMI-RV64-NEXT: .cfi_offset a3, -64 +; RNMI-RV64-NEXT: .cfi_offset a4, -72 +; RNMI-RV64-NEXT: .cfi_offset a5, -80 +; RNMI-RV64-NEXT: .cfi_offset a6, -88 +; RNMI-RV64-NEXT: .cfi_offset a7, -96 +; RNMI-RV64-NEXT: .cfi_offset t3, -104 +; RNMI-RV64-NEXT: .cfi_offset t4, -112 +; RNMI-RV64-NEXT: .cfi_offset t5, -120 +; RNMI-RV64-NEXT: .cfi_offset t6, -128 +; RNMI-RV64-NEXT: call callee +; RNMI-RV64-NEXT: ld ra, 120(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t0, 112(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t1, 104(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t2, 96(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a0, 88(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a1, 80(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a2, 72(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a3, 64(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a4, 56(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a5, 48(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a6, 40(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld a7, 32(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t3, 24(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t4, 16(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t5, 8(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: ld t6, 0(sp) # 8-byte Folded Reload +; RNMI-RV64-NEXT: .cfi_restore ra +; RNMI-RV64-NEXT: .cfi_restore t0 +; RNMI-RV64-NEXT: .cfi_restore t1 +; RNMI-RV64-NEXT: .cfi_restore t2 +; RNMI-RV64-NEXT: .cfi_restore a0 +; RNMI-RV64-NEXT: .cfi_restore a1 +; RNMI-RV64-NEXT: .cfi_restore a2 +; RNMI-RV64-NEXT: .cfi_restore a3 +; RNMI-RV64-NEXT: .cfi_restore a4 +; RNMI-RV64-NEXT: .cfi_restore a5 +; RNMI-RV64-NEXT: .cfi_restore a6 +; RNMI-RV64-NEXT: .cfi_restore a7 +; RNMI-RV64-NEXT: .cfi_restore t3 +; RNMI-RV64-NEXT: .cfi_restore t4 +; RNMI-RV64-NEXT: .cfi_restore t5 +; RNMI-RV64-NEXT: .cfi_restore t6 +; RNMI-RV64-NEXT: addi sp, sp, 128 +; RNMI-RV64-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV64-NEXT: mnret +; +; RNMI-RV64-FP-LABEL: test_rnmi_caller: +; RNMI-RV64-FP: # %bb.0: +; RNMI-RV64-FP-NEXT: addi sp, sp, -144 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa_offset 144 +; RNMI-RV64-FP-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t0, 128(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t1, 120(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t2, 112(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd s0, 104(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a0, 96(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a1, 88(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a2, 80(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a3, 72(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a4, 64(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a5, 56(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a6, 48(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd a7, 40(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t3, 32(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t4, 24(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t5, 16(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: sd t6, 8(sp) # 8-byte Folded Spill +; RNMI-RV64-FP-NEXT: .cfi_offset ra, -8 +; RNMI-RV64-FP-NEXT: .cfi_offset t0, -16 +; RNMI-RV64-FP-NEXT: .cfi_offset t1, -24 +; RNMI-RV64-FP-NEXT: .cfi_offset t2, -32 +; RNMI-RV64-FP-NEXT: .cfi_offset s0, -40 +; RNMI-RV64-FP-NEXT: .cfi_offset a0, -48 +; RNMI-RV64-FP-NEXT: .cfi_offset a1, -56 +; RNMI-RV64-FP-NEXT: .cfi_offset a2, -64 +; RNMI-RV64-FP-NEXT: .cfi_offset a3, -72 +; RNMI-RV64-FP-NEXT: .cfi_offset a4, -80 +; RNMI-RV64-FP-NEXT: .cfi_offset a5, -88 +; RNMI-RV64-FP-NEXT: .cfi_offset a6, -96 +; RNMI-RV64-FP-NEXT: .cfi_offset a7, -104 +; RNMI-RV64-FP-NEXT: .cfi_offset t3, -112 +; RNMI-RV64-FP-NEXT: .cfi_offset t4, -120 +; RNMI-RV64-FP-NEXT: .cfi_offset t5, -128 +; RNMI-RV64-FP-NEXT: .cfi_offset t6, -136 +; RNMI-RV64-FP-NEXT: addi s0, sp, 144 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa s0, 0 +; RNMI-RV64-FP-NEXT: call callee +; RNMI-RV64-FP-NEXT: .cfi_def_cfa sp, 144 +; RNMI-RV64-FP-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t0, 128(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t1, 120(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t2, 112(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld s0, 104(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a0, 96(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a1, 88(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a2, 80(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a3, 72(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a4, 64(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a5, 56(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a6, 48(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld a7, 40(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t3, 32(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t4, 24(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t5, 16(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: ld t6, 8(sp) # 8-byte Folded Reload +; RNMI-RV64-FP-NEXT: .cfi_restore ra +; RNMI-RV64-FP-NEXT: .cfi_restore t0 +; RNMI-RV64-FP-NEXT: .cfi_restore t1 +; RNMI-RV64-FP-NEXT: .cfi_restore t2 +; RNMI-RV64-FP-NEXT: .cfi_restore s0 +; RNMI-RV64-FP-NEXT: .cfi_restore a0 +; RNMI-RV64-FP-NEXT: .cfi_restore a1 +; RNMI-RV64-FP-NEXT: .cfi_restore a2 +; RNMI-RV64-FP-NEXT: .cfi_restore a3 +; RNMI-RV64-FP-NEXT: .cfi_restore a4 +; RNMI-RV64-FP-NEXT: .cfi_restore a5 +; RNMI-RV64-FP-NEXT: .cfi_restore a6 +; RNMI-RV64-FP-NEXT: .cfi_restore a7 +; RNMI-RV64-FP-NEXT: .cfi_restore t3 +; RNMI-RV64-FP-NEXT: .cfi_restore t4 +; RNMI-RV64-FP-NEXT: .cfi_restore t5 +; RNMI-RV64-FP-NEXT: .cfi_restore t6 +; RNMI-RV64-FP-NEXT: addi sp, sp, 144 +; RNMI-RV64-FP-NEXT: .cfi_def_cfa_offset 0 +; RNMI-RV64-FP-NEXT: mnret + call void @callee() + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 8dd6301..eb8b769 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -1587,59 +1587,59 @@ define i64 @sub_if_uge_i64(i64 %x, i64 %y) { define i128 @sub_if_uge_i128(i128 %x, i128 %y) { ; CHECK-LABEL: sub_if_uge_i128: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a7, 4(a2) -; CHECK-NEXT: lw a6, 8(a2) -; CHECK-NEXT: lw t0, 12(a2) ; CHECK-NEXT: lw a3, 4(a1) -; CHECK-NEXT: lw a4, 12(a1) -; CHECK-NEXT: lw a5, 8(a1) -; CHECK-NEXT: beq a4, t0, .LBB53_2 +; CHECK-NEXT: lw a4, 8(a1) +; CHECK-NEXT: lw a5, 12(a1) +; CHECK-NEXT: lw a6, 4(a2) +; CHECK-NEXT: lw t0, 12(a2) +; CHECK-NEXT: lw a7, 8(a2) +; CHECK-NEXT: beq a5, t0, .LBB53_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sltu t1, a4, t0 +; CHECK-NEXT: sltu t1, a5, t0 ; CHECK-NEXT: j .LBB53_3 ; CHECK-NEXT: .LBB53_2: -; CHECK-NEXT: sltu t1, a5, a6 +; CHECK-NEXT: sltu t1, a4, a7 ; CHECK-NEXT: .LBB53_3: -; CHECK-NEXT: lw a2, 0(a2) ; CHECK-NEXT: lw a1, 0(a1) -; CHECK-NEXT: beq a3, a7, .LBB53_5 +; CHECK-NEXT: lw a2, 0(a2) +; CHECK-NEXT: beq a3, a6, .LBB53_5 ; CHECK-NEXT: # %bb.4: -; CHECK-NEXT: sltu t2, a3, a7 +; CHECK-NEXT: sltu t2, a3, a6 ; CHECK-NEXT: j .LBB53_6 ; CHECK-NEXT: .LBB53_5: ; CHECK-NEXT: sltu t2, a1, a2 ; CHECK-NEXT: .LBB53_6: -; CHECK-NEXT: xor t3, a4, t0 -; CHECK-NEXT: xor t4, a5, a6 +; CHECK-NEXT: xor t3, a5, t0 +; CHECK-NEXT: xor t4, a4, a7 ; CHECK-NEXT: or t3, t4, t3 ; CHECK-NEXT: beqz t3, .LBB53_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: mv t2, t1 ; CHECK-NEXT: .LBB53_8: -; CHECK-NEXT: addi t2, t2, -1 -; CHECK-NEXT: and t1, t2, t0 -; CHECK-NEXT: and t0, t2, a2 -; CHECK-NEXT: and a7, t2, a7 +; CHECK-NEXT: addi t3, t2, -1 +; CHECK-NEXT: and t2, t3, t0 +; CHECK-NEXT: and t0, t3, a2 +; CHECK-NEXT: and t1, t3, a6 ; CHECK-NEXT: sltu a2, a1, t0 -; CHECK-NEXT: and t2, t2, a6 +; CHECK-NEXT: and a7, t3, a7 ; CHECK-NEXT: mv a6, a2 -; CHECK-NEXT: beq a3, a7, .LBB53_10 +; CHECK-NEXT: beq a3, t1, .LBB53_10 ; CHECK-NEXT: # %bb.9: -; CHECK-NEXT: sltu a6, a3, a7 +; CHECK-NEXT: sltu a6, a3, t1 ; CHECK-NEXT: .LBB53_10: -; CHECK-NEXT: sub t3, a5, t2 -; CHECK-NEXT: sltu a5, a5, t2 -; CHECK-NEXT: sub a4, a4, t1 -; CHECK-NEXT: sub a3, a3, a7 +; CHECK-NEXT: sub t3, a4, a7 +; CHECK-NEXT: sltu a4, a4, a7 +; CHECK-NEXT: sub a5, a5, t2 +; CHECK-NEXT: sub a3, a3, t1 ; CHECK-NEXT: sub a1, a1, t0 ; CHECK-NEXT: sltu a7, t3, a6 -; CHECK-NEXT: sub a4, a4, a5 -; CHECK-NEXT: sub a5, t3, a6 +; CHECK-NEXT: sub a5, a5, a4 +; CHECK-NEXT: sub a4, t3, a6 ; CHECK-NEXT: sub a3, a3, a2 -; CHECK-NEXT: sub a2, a4, a7 +; CHECK-NEXT: sub a2, a5, a7 ; CHECK-NEXT: sw a1, 0(a0) ; CHECK-NEXT: sw a3, 4(a0) -; CHECK-NEXT: sw a5, 8(a0) +; CHECK-NEXT: sw a4, 8(a0) ; CHECK-NEXT: sw a2, 12(a0) ; CHECK-NEXT: ret %cmp = icmp ult i128 %x, %y diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbkb.ll index 4aa6dd4..42d326e 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbkb.ll @@ -319,3 +319,142 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind { %1 = zext i16 %a to i64 ret i64 %1 } + +define i32 @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3) nounwind { +; RV32I-LABEL: pack_lo_packh_hi_packh: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_packh_hi_packh: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a0, a0, a1 +; RV32ZBKB-NEXT: packh a1, a2, a3 +; RV32ZBKB-NEXT: pack a0, a0, a1 +; RV32ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + ret i32 %j +} + +define i32 @pack_lo_packh_hi_packh_2(i8 %0, i8 %1, i8 %2, i8 %3) nounwind { +; RV32I-LABEL: pack_lo_packh_hi_packh_2: +; RV32I: # %bb.0: +; RV32I-NEXT: zext.b a0, a0 +; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: zext.b a2, a2 +; RV32I-NEXT: slli a3, a3, 24 +; RV32I-NEXT: slli a1, a1, 8 +; RV32I-NEXT: slli a2, a2, 16 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_packh_hi_packh_2: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a0, a0, a1 +; RV32ZBKB-NEXT: packh a1, a2, a3 +; RV32ZBKB-NEXT: pack a0, a0, a1 +; RV32ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + ret i32 %j +} + +define i32 @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2) nounwind { +; RV32I-LABEL: pack_lo_zext_hi_packh: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: slli a2, a2, 24 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_zext_hi_packh: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a1, a1, a2 +; RV32ZBKB-NEXT: pack a0, a0, a1 +; RV32ZBKB-NEXT: ret + %a = zext i16 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + ret i32 %g +} + +; Negative test, %a isn't extended so we can't use pack for the outer or, but +; we can use packh for the high half. +define i32 @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2) nounwind { +; RV32I-LABEL: pack_lo_noext_hi_packh: +; RV32I: # %bb.0: +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: slli a2, a2, 24 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_noext_hi_packh: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a1, a1, a2 +; RV32ZBKB-NEXT: slli a1, a1, 16 +; RV32ZBKB-NEXT: or a0, a1, a0 +; RV32ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + ret i32 %g +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define i32 @pack_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2) nounwind { +; RV32I-LABEL: pack_lo_noext_hi_packh_nozeroext: +; RV32I: # %bb.0: +; RV32I-NEXT: zext.b a1, a1 +; RV32I-NEXT: slli a2, a2, 24 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: ret +; +; RV32ZBKB-LABEL: pack_lo_noext_hi_packh_nozeroext: +; RV32ZBKB: # %bb.0: +; RV32ZBKB-NEXT: packh a1, a1, a2 +; RV32ZBKB-NEXT: slli a1, a1, 16 +; RV32ZBKB-NEXT: or a0, a1, a0 +; RV32ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + ret i32 %g +} diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll index 1a3beeb7..e3728bf 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -785,16 +785,16 @@ define i32 @bset_trailing_ones_i32_no_mask(i32 %a) nounwind { define i64 @bset_trailing_ones_i64_mask(i64 %a) nounwind { ; CHECK-LABEL: bset_trailing_ones_i64_mask: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, -1 -; CHECK-NEXT: andi a3, a0, 63 -; CHECK-NEXT: addi a1, a3, -32 -; CHECK-NEXT: sll a0, a2, a0 +; CHECK-NEXT: andi a2, a0, 63 +; CHECK-NEXT: li a3, -1 +; CHECK-NEXT: addi a1, a2, -32 +; CHECK-NEXT: sll a0, a3, a0 ; CHECK-NEXT: bltz a1, .LBB43_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: sll a2, a2, a3 +; CHECK-NEXT: sll a2, a3, a2 ; CHECK-NEXT: j .LBB43_3 ; CHECK-NEXT: .LBB43_2: -; CHECK-NEXT: not a2, a3 +; CHECK-NEXT: not a2, a2 ; CHECK-NEXT: lui a3, 524288 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: srl a2, a3, a2 diff --git a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll index 57061e1..f89d1abf 100644 --- a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll @@ -253,8 +253,8 @@ define i128 @fptosi_sat_f16_to_i128(half %a) nounwind { ; RV64IZHINX-NEXT: srli a1, a2, 1 ; RV64IZHINX-NEXT: .LBB4_4: ; RV64IZHINX-NEXT: feq.s a2, s0, s0 -; RV64IZHINX-NEXT: neg a3, a3 ; RV64IZHINX-NEXT: neg a4, s1 +; RV64IZHINX-NEXT: neg a3, a3 ; RV64IZHINX-NEXT: neg a2, a2 ; RV64IZHINX-NEXT: and a0, a4, a0 ; RV64IZHINX-NEXT: and a1, a2, a1 @@ -334,18 +334,19 @@ define i128 @fptoui_sat_f16_to_i128(half %a) nounwind { ; RV64IZHINX-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64IZHINX-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64IZHINX-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64IZHINX-NEXT: fcvt.s.h a0, a0 -; RV64IZHINX-NEXT: lui a1, 522240 -; RV64IZHINX-NEXT: addi a1, a1, -1 -; RV64IZHINX-NEXT: fle.s a2, zero, a0 -; RV64IZHINX-NEXT: flt.s a1, a1, a0 -; RV64IZHINX-NEXT: neg s0, a1 -; RV64IZHINX-NEXT: neg s1, a2 +; RV64IZHINX-NEXT: fcvt.s.h s0, a0 +; RV64IZHINX-NEXT: fle.s a0, zero, s0 +; RV64IZHINX-NEXT: neg s1, a0 +; RV64IZHINX-NEXT: mv a0, s0 ; RV64IZHINX-NEXT: call __fixunssfti ; RV64IZHINX-NEXT: and a0, s1, a0 +; RV64IZHINX-NEXT: lui a2, 522240 ; RV64IZHINX-NEXT: and a1, s1, a1 -; RV64IZHINX-NEXT: or a0, s0, a0 -; RV64IZHINX-NEXT: or a1, s0, a1 +; RV64IZHINX-NEXT: addi a2, a2, -1 +; RV64IZHINX-NEXT: flt.s a2, a2, s0 +; RV64IZHINX-NEXT: neg a2, a2 +; RV64IZHINX-NEXT: or a0, a2, a0 +; RV64IZHINX-NEXT: or a1, a2, a1 ; RV64IZHINX-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64IZHINX-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64IZHINX-NEXT: ld s1, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll index 818ea72..f2c41db 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -392,3 +392,217 @@ define i64 @zext_i16_to_i64(i16 %a) nounwind { %1 = zext i16 %a to i64 ret i64 %1 } + +define void @pack_lo_packh_hi_packh(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a2, a3 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %h, %f + %j = or i32 %i, %g + store i32 %j, ptr %p + ret void +} + +define void @pack_lo_packh_hi_packh_2(i8 zeroext %0, i8 zeroext %1, i8 zeroext %2, i8 zeroext %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh_2: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_2: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %g, %h + %j = or i32 %f, %i + store i32 %j, ptr %p + ret void +} + +define void @pack_lo_packh_hi_packh_3(i8 %0, i8 %1, i8 %2, i8 %3, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_packh_hi_packh_3: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a0, a0 +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a2, a2 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: slli a1, a1, 8 +; RV64I-NEXT: slli a2, a2, 16 +; RV64I-NEXT: or a0, a3, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: sw a0, 0(a4) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_packh_hi_packh_3: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a0, a0, a1 +; RV64ZBKB-NEXT: packh a1, a3, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a4) +; RV64ZBKB-NEXT: ret + %a = zext i8 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = zext i8 %3 to i32 + %e = shl i32 %b, 8 + %f = shl i32 %c, 16 + %g = shl i32 %d, 24 + %h = or i32 %a, %e + %i = or i32 %g, %h + %j = or i32 %f, %i + store i32 %j, ptr %p + ret void +} + +define void @pack_lo_zext_hi_packh(i16 zeroext %0, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_zext_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_zext_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: packw a0, a0, a1 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %a = zext i16 %0 to i32 + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Negative test, %a isn't extended so we can't use packw for the outer or, but +; we can use packh for the high half. +define void @pack_lo_noext_hi_packh(i32 %a, i8 zeroext %1, i8 zeroext %2, ptr %p) nounwind { +; RV64I-LABEL: pack_lo_noext_hi_packh: +; RV64I: # %bb.0: +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_lo_noext_hi_packh: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define void @pack_i32_lo_noext_hi_packh_nozeroext(i32 %a, i8 %1, i8 %2, ptr %p) nounwind { +; RV64I-LABEL: pack_i32_lo_noext_hi_packh_nozeroext: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: sw a0, 0(a3) +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_i32_lo_noext_hi_packh_nozeroext: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: sw a0, 0(a3) +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i32 + %c = zext i8 %2 to i32 + %d = shl i32 %c, 8 + %e = or i32 %b, %d + %f = shl i32 %e, 16 + %g = or i32 %f, %a + store i32 %g, ptr %p + ret void +} + +; Make sure we can match packh+slli without having the input bytes zero extended. +define i64 @pack_i64_lo_noext_hi_packh_nozeroext(i64 %a, i8 %1, i8 %2, ptr %p) nounwind { +; RV64I-LABEL: pack_i64_lo_noext_hi_packh_nozeroext: +; RV64I: # %bb.0: +; RV64I-NEXT: zext.b a1, a1 +; RV64I-NEXT: zext.b a2, a2 +; RV64I-NEXT: slli a1, a1, 16 +; RV64I-NEXT: slli a2, a2, 24 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBKB-LABEL: pack_i64_lo_noext_hi_packh_nozeroext: +; RV64ZBKB: # %bb.0: +; RV64ZBKB-NEXT: packh a1, a1, a2 +; RV64ZBKB-NEXT: slli a1, a1, 16 +; RV64ZBKB-NEXT: or a0, a1, a0 +; RV64ZBKB-NEXT: ret + %b = zext i8 %1 to i64 + %c = zext i8 %2 to i64 + %d = shl i64 %c, 8 + %e = or i64 %b, %d + %f = shl i64 %e, 16 + %g = or i64 %f, %a + ret i64 %g +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll new file mode 100644 index 0000000..5b01976 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -0,0 +1,586 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i8>, i32 } %load +} + +define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x i8>, i32 } %load +} + +define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i8>, i32 } %load +} + +define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x i8>, i32 } %load +} + +define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i8>, i32 } %load +} + +define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x i8>, i32 } %load +} + +define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i16>, i32 } %load +} + +define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x i16>, i32 } %load +} + +define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i16>, i32 } %load +} + +define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x i16>, i32 } %load +} + +define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i16>, i32 } %load +} + +define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x i16>, i32 } %load +} + +define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i32>, i32 } %load +} + +define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x i32>, i32 } %load +} + +define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i32>, i32 } %load +} + +define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x i32>, i32 } %load +} + +define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i32>, i32 } %load +} + +define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x i32>, i32 } %load +} + +define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i64>, i32 } %load +} + +define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x i64>, i32 } %load +} + +define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i64>, i32 } %load +} + +define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x i64>, i32 } %load +} + +define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i64>, i32 } %load +} + +define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x i64>, i32 } %load +} + +define { <32 x i64>, i32 } @vploadff_v32i64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a2, a3, .LBB24_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB24_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a1), v0.t +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: sw a1, 256(a0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) + ret { <32 x i64>, i32 } %load +} + +define { <32 x i64>, i32 } @vploadff_v32i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v32i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a2, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a1) +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: sw a1, 256(a0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> splat (i1 true), i32 %evl) + ret { <32 x i64>, i32 } %load +} + +define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x half>, i32 } %load +} + +define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x half>, i32 } %load +} + +define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x half>, i32 } %load +} + +define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x half>, i32 } %load +} + +define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x half>, i32 } %load +} + +define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x half>, i32 } %load +} + +define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x float>, i32 } %load +} + +define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x float>, i32 } %load +} + +define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x float>, i32 } %load +} + +define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x float>, i32 } %load +} + +define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x float>, i32 } %load +} + +define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x float>, i32 } %load +} + +define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x double>, i32 } %load +} + +define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x double>, i32 } %load +} + +define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x double>, i32 } %load +} + +define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x double>, i32 } %load +} + +define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x double>, i32 } %load +} + +define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x double>, i32 } %load +} + +define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x bfloat>, i32 } %load +} + +define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + ret { <2 x bfloat>, i32 } %load +} + +define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x bfloat>, i32 } %load +} + +define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + ret { <4 x bfloat>, i32 } %load +} + +define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x bfloat>, i32 } %load +} + +define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + ret { <8 x bfloat>, i32 } %load +} + +define { <7 x i8>, i32 } @vploadff_v7i8(ptr %ptr, <7 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v7i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <7 x i8>, i32 } @llvm.vp.load.ff.v7i8.p0(ptr %ptr, <7 x i1> %m, i32 %evl) + ret { <7 x i8>, i32 } %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll new file mode 100644 index 0000000..ac3cd84 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fp4-bitcast.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mattr='+v' < %s | FileCheck %s + +define <2 x i8> @fp4(<4 x i4> %0) nounwind { +; CHECK-LABEL: fp4: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vslidedown.vi v9, v8, 1 +; CHECK-NEXT: vmv.x.s a1, v9 +; CHECK-NEXT: vslidedown.vi v9, v8, 2 +; CHECK-NEXT: vslidedown.vi v8, v8, 3 +; CHECK-NEXT: andi a0, a0, 15 +; CHECK-NEXT: vmv.x.s a2, v9 +; CHECK-NEXT: andi a1, a1, 15 +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: andi a2, a2, 15 +; CHECK-NEXT: slli a1, a1, 12 +; CHECK-NEXT: slli a2, a2, 8 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: sh a0, 14(sp) +; CHECK-NEXT: addi a0, sp, 14 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %2 = bitcast <4 x i4> %0 to <2 x i8> + ret <2 x i8> %2 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index f481f9c..9ef7f94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -89,17 +89,17 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz -; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 +; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz +; CHECK-NOV-NEXT: blt a0, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB2_2: # %entry -; CHECK-NOV-NEXT: blt a0, a2, .LBB2_4 +; CHECK-NOV-NEXT: blt a1, a2, .LBB2_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB2_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -254,50 +254,50 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz -; CHECK-NOV-NEXT: li a4, -1 -; CHECK-NOV-NEXT: srli a4, a4, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 +; CHECK-NOV-NEXT: fcvt.l.s a1, fa0, rtz +; CHECK-NOV-NEXT: li a5, -1 +; CHECK-NOV-NEXT: srli a5, a5, 32 +; CHECK-NOV-NEXT: fcvt.l.s a2, fa1, rtz +; CHECK-NOV-NEXT: bge a1, a5, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a4, .LBB5_7 +; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: bge a2, a5, .LBB5_7 ; CHECK-NOV-NEXT: .LBB5_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a4, .LBB5_8 +; CHECK-NOV-NEXT: fcvt.l.s a4, fa3, rtz +; CHECK-NOV-NEXT: bge a3, a5, .LBB5_8 ; CHECK-NOV-NEXT: .LBB5_3: # %entry -; CHECK-NOV-NEXT: blt a5, a4, .LBB5_5 +; CHECK-NOV-NEXT: blt a4, a5, .LBB5_5 ; CHECK-NOV-NEXT: .LBB5_4: # %entry -; CHECK-NOV-NEXT: mv a5, a4 +; CHECK-NOV-NEXT: mv a4, a5 ; CHECK-NOV-NEXT: .LBB5_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a1 -; CHECK-NOV-NEXT: sgtz a6, a2 -; CHECK-NOV-NEXT: sgtz a7, a3 -; CHECK-NOV-NEXT: sgtz t0, a5 +; CHECK-NOV-NEXT: sgtz a5, a4 +; CHECK-NOV-NEXT: sgtz a6, a3 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: sgtz t0, a1 ; CHECK-NOV-NEXT: neg t0, t0 ; CHECK-NOV-NEXT: neg a7, a7 ; CHECK-NOV-NEXT: neg a6, a6 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a5, t0, a5 -; CHECK-NOV-NEXT: and a3, a7, a3 -; CHECK-NOV-NEXT: and a2, a6, a2 -; CHECK-NOV-NEXT: and a1, a4, a1 -; CHECK-NOV-NEXT: sw a5, 0(a0) -; CHECK-NOV-NEXT: sw a3, 4(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a4, a5, a4 +; CHECK-NOV-NEXT: sw a1, 0(a0) +; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a3, 8(a0) +; CHECK-NOV-NEXT: sw a4, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB5_6: # %entry -; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a4, .LBB5_2 +; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: fcvt.l.s a3, fa2, rtz +; CHECK-NOV-NEXT: blt a2, a5, .LBB5_2 ; CHECK-NOV-NEXT: .LBB5_7: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a4, .LBB5_3 +; CHECK-NOV-NEXT: mv a2, a5 +; CHECK-NOV-NEXT: fcvt.l.s a4, fa3, rtz +; CHECK-NOV-NEXT: blt a3, a5, .LBB5_3 ; CHECK-NOV-NEXT: .LBB5_8: # %entry -; CHECK-NOV-NEXT: mv a3, a4 -; CHECK-NOV-NEXT: bge a5, a4, .LBB5_4 +; CHECK-NOV-NEXT: mv a3, a5 +; CHECK-NOV-NEXT: bge a4, a5, .LBB5_4 ; CHECK-NOV-NEXT: j .LBB5_5 ; ; CHECK-V-LABEL: ustest_f32i32: @@ -720,8 +720,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state ; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu s2, 8(a1) -; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu s2, 16(a1) ; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a2 @@ -730,43 +730,43 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-NOV-NEXT: li a2, -1 -; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: bge a0, a2, .LBB8_6 +; CHECK-NOV-NEXT: li a3, -1 +; CHECK-NOV-NEXT: srli a3, a3, 32 +; CHECK-NOV-NEXT: bge a0, a3, .LBB8_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: bge s1, a2, .LBB8_7 +; CHECK-NOV-NEXT: bge s1, a3, .LBB8_7 ; CHECK-NOV-NEXT: .LBB8_2: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz -; CHECK-NOV-NEXT: bge a1, a2, .LBB8_8 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz +; CHECK-NOV-NEXT: bge a1, a3, .LBB8_8 ; CHECK-NOV-NEXT: .LBB8_3: # %entry -; CHECK-NOV-NEXT: blt a3, a2, .LBB8_5 +; CHECK-NOV-NEXT: blt a2, a3, .LBB8_5 ; CHECK-NOV-NEXT: .LBB8_4: # %entry -; CHECK-NOV-NEXT: mv a3, a2 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: .LBB8_5: # %entry -; CHECK-NOV-NEXT: sgtz a2, a0 -; CHECK-NOV-NEXT: sgtz a4, s1 -; CHECK-NOV-NEXT: sgtz a5, a1 -; CHECK-NOV-NEXT: sgtz a6, a3 +; CHECK-NOV-NEXT: sgtz a3, a2 +; CHECK-NOV-NEXT: sgtz a4, a1 +; CHECK-NOV-NEXT: sgtz a5, s1 +; CHECK-NOV-NEXT: sgtz a6, a0 ; CHECK-NOV-NEXT: neg a6, a6 ; CHECK-NOV-NEXT: neg a5, a5 ; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: neg a2, a2 -; CHECK-NOV-NEXT: and a3, a6, a3 -; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: and a4, a4, s1 -; CHECK-NOV-NEXT: and a0, a2, a0 -; CHECK-NOV-NEXT: sw a3, 0(s0) -; CHECK-NOV-NEXT: sw a1, 4(s0) -; CHECK-NOV-NEXT: sw a4, 8(s0) -; CHECK-NOV-NEXT: sw a0, 12(s0) +; CHECK-NOV-NEXT: neg a3, a3 +; CHECK-NOV-NEXT: and a0, a6, a0 +; CHECK-NOV-NEXT: and a5, a5, s1 +; CHECK-NOV-NEXT: and a1, a4, a1 +; CHECK-NOV-NEXT: and a2, a3, a2 +; CHECK-NOV-NEXT: sw a0, 0(s0) +; CHECK-NOV-NEXT: sw a5, 4(s0) +; CHECK-NOV-NEXT: sw a1, 8(s0) +; CHECK-NOV-NEXT: sw a2, 12(s0) ; CHECK-NOV-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -788,16 +788,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB8_6: # %entry ; CHECK-NOV-NEXT: .cfi_restore_state -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a0, a3 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs1, rtz -; CHECK-NOV-NEXT: blt s1, a2, .LBB8_2 +; CHECK-NOV-NEXT: blt s1, a3, .LBB8_2 ; CHECK-NOV-NEXT: .LBB8_7: # %entry -; CHECK-NOV-NEXT: mv s1, a2 -; CHECK-NOV-NEXT: fcvt.l.s a3, fs0, rtz -; CHECK-NOV-NEXT: blt a1, a2, .LBB8_3 +; CHECK-NOV-NEXT: mv s1, a3 +; CHECK-NOV-NEXT: fcvt.l.s a2, fs0, rtz +; CHECK-NOV-NEXT: blt a1, a3, .LBB8_3 ; CHECK-NOV-NEXT: .LBB8_8: # %entry -; CHECK-NOV-NEXT: mv a1, a2 -; CHECK-NOV-NEXT: bge a3, a2, .LBB8_4 +; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: bge a2, a3, .LBB8_4 ; CHECK-NOV-NEXT: j .LBB8_5 ; ; CHECK-V-LABEL: ustest_f16i32: @@ -977,17 +977,17 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addi a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz -; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 +; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz +; CHECK-NOV-NEXT: blt a0, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: mv a1, a2 +; CHECK-NOV-NEXT: mv a0, a2 ; CHECK-NOV-NEXT: .LBB11_2: # %entry -; CHECK-NOV-NEXT: blt a0, a2, .LBB11_4 +; CHECK-NOV-NEXT: blt a1, a2, .LBB11_4 ; CHECK-NOV-NEXT: # %bb.3: # %entry -; CHECK-NOV-NEXT: mv a0, a2 +; CHECK-NOV-NEXT: mv a1, a2 ; CHECK-NOV-NEXT: .LBB11_4: # %entry ; CHECK-NOV-NEXT: sgtz a2, a1 ; CHECK-NOV-NEXT: sgtz a3, a0 @@ -1146,50 +1146,50 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz -; CHECK-NOV-NEXT: lui a4, 16 -; CHECK-NOV-NEXT: addi a4, a4, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 +; CHECK-NOV-NEXT: fcvt.w.s a1, fa0, rtz +; CHECK-NOV-NEXT: lui a5, 16 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: fcvt.w.s a2, fa1, rtz +; CHECK-NOV-NEXT: bge a1, a5, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a4, .LBB14_7 +; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: bge a2, a5, .LBB14_7 ; CHECK-NOV-NEXT: .LBB14_2: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz -; CHECK-NOV-NEXT: bge a3, a4, .LBB14_8 +; CHECK-NOV-NEXT: fcvt.w.s a4, fa3, rtz +; CHECK-NOV-NEXT: bge a3, a5, .LBB14_8 ; CHECK-NOV-NEXT: .LBB14_3: # %entry -; CHECK-NOV-NEXT: blt a5, a4, .LBB14_5 +; CHECK-NOV-NEXT: blt a4, a5, .LBB14_5 ; CHECK-NOV-NEXT: .LBB14_4: # %entry -; CHECK-NOV-NEXT: mv a5, a4 +; CHECK-NOV-NEXT: mv a4, a5 ; CHECK-NOV-NEXT: .LBB14_5: # %entry -; CHECK-NOV-NEXT: sgtz a4, a1 -; CHECK-NOV-NEXT: sgtz a6, a2 -; CHECK-NOV-NEXT: sgtz a7, a3 -; CHECK-NOV-NEXT: sgtz t0, a5 +; CHECK-NOV-NEXT: sgtz a5, a4 +; CHECK-NOV-NEXT: sgtz a6, a3 +; CHECK-NOV-NEXT: sgtz a7, a2 +; CHECK-NOV-NEXT: sgtz t0, a1 ; CHECK-NOV-NEXT: neg t0, t0 ; CHECK-NOV-NEXT: neg a7, a7 ; CHECK-NOV-NEXT: neg a6, a6 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a5, t0, a5 -; CHECK-NOV-NEXT: and a3, a7, a3 -; CHECK-NOV-NEXT: and a2, a6, a2 -; CHECK-NOV-NEXT: and a1, a4, a1 -; CHECK-NOV-NEXT: sh a5, 0(a0) -; CHECK-NOV-NEXT: sh a3, 2(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a1, t0, a1 +; CHECK-NOV-NEXT: and a2, a7, a2 +; CHECK-NOV-NEXT: and a3, a6, a3 +; CHECK-NOV-NEXT: and a4, a5, a4 +; CHECK-NOV-NEXT: sh a1, 0(a0) +; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a3, 4(a0) +; CHECK-NOV-NEXT: sh a4, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB14_6: # %entry -; CHECK-NOV-NEXT: mv a1, a4 -; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a4, .LBB14_2 +; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: fcvt.w.s a3, fa2, rtz +; CHECK-NOV-NEXT: blt a2, a5, .LBB14_2 ; CHECK-NOV-NEXT: .LBB14_7: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz -; CHECK-NOV-NEXT: blt a3, a4, .LBB14_3 +; CHECK-NOV-NEXT: mv a2, a5 +; CHECK-NOV-NEXT: fcvt.w.s a4, fa3, rtz +; CHECK-NOV-NEXT: blt a3, a5, .LBB14_3 ; CHECK-NOV-NEXT: .LBB14_8: # %entry -; CHECK-NOV-NEXT: mv a3, a4 -; CHECK-NOV-NEXT: bge a5, a4, .LBB14_4 +; CHECK-NOV-NEXT: mv a3, a5 +; CHECK-NOV-NEXT: bge a4, a5, .LBB14_4 ; CHECK-NOV-NEXT: j .LBB14_5 ; ; CHECK-V-LABEL: ustest_f32i16: @@ -1974,72 +1974,72 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_remember_state ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) -; CHECK-NOV-NEXT: lhu a2, 48(a1) -; CHECK-NOV-NEXT: lhu s3, 56(a1) -; CHECK-NOV-NEXT: lhu s4, 0(a1) -; CHECK-NOV-NEXT: lhu s5, 8(a1) +; CHECK-NOV-NEXT: lhu s3, 48(a1) +; CHECK-NOV-NEXT: lhu s4, 56(a1) +; CHECK-NOV-NEXT: lhu s5, 0(a1) +; CHECK-NOV-NEXT: lhu a2, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: fmv.w.x fa0, a2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s6 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs5, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 +; CHECK-NOV-NEXT: fmv.w.x fa0, s7 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs4, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s7 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs3, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s6 +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-NOV-NEXT: lui a4, 16 -; CHECK-NOV-NEXT: addi a4, a4, -1 -; CHECK-NOV-NEXT: bge a0, a4, .LBB17_10 +; CHECK-NOV-NEXT: lui a5, 16 +; CHECK-NOV-NEXT: addi a5, a5, -1 +; CHECK-NOV-NEXT: bge a0, a5, .LBB17_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: bge s1, a4, .LBB17_11 +; CHECK-NOV-NEXT: bge s1, a5, .LBB17_11 ; CHECK-NOV-NEXT: .LBB17_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: bge a1, a4, .LBB17_12 +; CHECK-NOV-NEXT: bge a1, a5, .LBB17_12 ; CHECK-NOV-NEXT: .LBB17_3: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: bge a2, a4, .LBB17_13 +; CHECK-NOV-NEXT: bge a2, a5, .LBB17_13 ; CHECK-NOV-NEXT: .LBB17_4: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz -; CHECK-NOV-NEXT: bge a3, a4, .LBB17_14 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NOV-NEXT: bge a3, a5, .LBB17_14 ; CHECK-NOV-NEXT: .LBB17_5: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz -; CHECK-NOV-NEXT: bge a5, a4, .LBB17_15 +; CHECK-NOV-NEXT: bge a4, a5, .LBB17_15 ; CHECK-NOV-NEXT: .LBB17_6: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz -; CHECK-NOV-NEXT: bge a6, a4, .LBB17_16 +; CHECK-NOV-NEXT: bge a6, a5, .LBB17_16 ; CHECK-NOV-NEXT: .LBB17_7: # %entry -; CHECK-NOV-NEXT: blt a7, a4, .LBB17_9 +; CHECK-NOV-NEXT: blt a7, a5, .LBB17_9 ; CHECK-NOV-NEXT: .LBB17_8: # %entry -; CHECK-NOV-NEXT: mv a7, a4 +; CHECK-NOV-NEXT: mv a7, a5 ; CHECK-NOV-NEXT: .LBB17_9: # %entry -; CHECK-NOV-NEXT: sgtz a4, a0 -; CHECK-NOV-NEXT: sgtz t0, s1 -; CHECK-NOV-NEXT: sgtz t1, a1 -; CHECK-NOV-NEXT: sgtz t2, a2 -; CHECK-NOV-NEXT: sgtz t3, a3 -; CHECK-NOV-NEXT: sgtz t4, a5 -; CHECK-NOV-NEXT: sgtz t5, a6 -; CHECK-NOV-NEXT: sgtz t6, a7 +; CHECK-NOV-NEXT: sgtz a5, a7 +; CHECK-NOV-NEXT: sgtz t0, a6 +; CHECK-NOV-NEXT: sgtz t1, a4 +; CHECK-NOV-NEXT: sgtz t2, a3 +; CHECK-NOV-NEXT: sgtz t3, a2 +; CHECK-NOV-NEXT: sgtz t4, a1 +; CHECK-NOV-NEXT: sgtz t5, s1 +; CHECK-NOV-NEXT: sgtz t6, a0 ; CHECK-NOV-NEXT: neg t6, t6 ; CHECK-NOV-NEXT: neg t5, t5 ; CHECK-NOV-NEXT: neg t4, t4 @@ -2047,23 +2047,23 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: neg t2, t2 ; CHECK-NOV-NEXT: neg t1, t1 ; CHECK-NOV-NEXT: neg t0, t0 -; CHECK-NOV-NEXT: neg a4, a4 -; CHECK-NOV-NEXT: and a7, t6, a7 -; CHECK-NOV-NEXT: and a6, t5, a6 -; CHECK-NOV-NEXT: and a5, t4, a5 -; CHECK-NOV-NEXT: and a3, t3, a3 -; CHECK-NOV-NEXT: and a2, t2, a2 -; CHECK-NOV-NEXT: and a1, t1, a1 -; CHECK-NOV-NEXT: and t0, t0, s1 -; CHECK-NOV-NEXT: and a0, a4, a0 -; CHECK-NOV-NEXT: sh a2, 8(s0) -; CHECK-NOV-NEXT: sh a1, 10(s0) -; CHECK-NOV-NEXT: sh t0, 12(s0) -; CHECK-NOV-NEXT: sh a0, 14(s0) -; CHECK-NOV-NEXT: sh a7, 0(s0) -; CHECK-NOV-NEXT: sh a6, 2(s0) -; CHECK-NOV-NEXT: sh a5, 4(s0) -; CHECK-NOV-NEXT: sh a3, 6(s0) +; CHECK-NOV-NEXT: neg a5, a5 +; CHECK-NOV-NEXT: and a0, t6, a0 +; CHECK-NOV-NEXT: and t5, t5, s1 +; CHECK-NOV-NEXT: and a1, t4, a1 +; CHECK-NOV-NEXT: and a2, t3, a2 +; CHECK-NOV-NEXT: and a3, t2, a3 +; CHECK-NOV-NEXT: and a4, t1, a4 +; CHECK-NOV-NEXT: and a6, t0, a6 +; CHECK-NOV-NEXT: and a5, a5, a7 +; CHECK-NOV-NEXT: sh a3, 8(s0) +; CHECK-NOV-NEXT: sh a4, 10(s0) +; CHECK-NOV-NEXT: sh a6, 12(s0) +; CHECK-NOV-NEXT: sh a5, 14(s0) +; CHECK-NOV-NEXT: sh a0, 0(s0) +; CHECK-NOV-NEXT: sh t5, 2(s0) +; CHECK-NOV-NEXT: sh a1, 4(s0) +; CHECK-NOV-NEXT: sh a2, 6(s0) ; CHECK-NOV-NEXT: ld ra, 120(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s0, 112(sp) # 8-byte Folded Reload ; CHECK-NOV-NEXT: ld s1, 104(sp) # 8-byte Folded Reload @@ -2101,32 +2101,32 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB17_10: # %entry ; CHECK-NOV-NEXT: .cfi_restore_state -; CHECK-NOV-NEXT: mv a0, a4 +; CHECK-NOV-NEXT: mv a0, a5 ; CHECK-NOV-NEXT: fcvt.l.s a1, fs5, rtz -; CHECK-NOV-NEXT: blt s1, a4, .LBB17_2 +; CHECK-NOV-NEXT: blt s1, a5, .LBB17_2 ; CHECK-NOV-NEXT: .LBB17_11: # %entry -; CHECK-NOV-NEXT: mv s1, a4 +; CHECK-NOV-NEXT: mv s1, a5 ; CHECK-NOV-NEXT: fcvt.l.s a2, fs4, rtz -; CHECK-NOV-NEXT: blt a1, a4, .LBB17_3 +; CHECK-NOV-NEXT: blt a1, a5, .LBB17_3 ; CHECK-NOV-NEXT: .LBB17_12: # %entry -; CHECK-NOV-NEXT: mv a1, a4 +; CHECK-NOV-NEXT: mv a1, a5 ; CHECK-NOV-NEXT: fcvt.l.s a3, fs3, rtz -; CHECK-NOV-NEXT: blt a2, a4, .LBB17_4 +; CHECK-NOV-NEXT: blt a2, a5, .LBB17_4 ; CHECK-NOV-NEXT: .LBB17_13: # %entry -; CHECK-NOV-NEXT: mv a2, a4 -; CHECK-NOV-NEXT: fcvt.l.s a5, fs2, rtz -; CHECK-NOV-NEXT: blt a3, a4, .LBB17_5 +; CHECK-NOV-NEXT: mv a2, a5 +; CHECK-NOV-NEXT: fcvt.l.s a4, fs2, rtz +; CHECK-NOV-NEXT: blt a3, a5, .LBB17_5 ; CHECK-NOV-NEXT: .LBB17_14: # %entry -; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: mv a3, a5 ; CHECK-NOV-NEXT: fcvt.l.s a6, fs1, rtz -; CHECK-NOV-NEXT: blt a5, a4, .LBB17_6 +; CHECK-NOV-NEXT: blt a4, a5, .LBB17_6 ; CHECK-NOV-NEXT: .LBB17_15: # %entry -; CHECK-NOV-NEXT: mv a5, a4 +; CHECK-NOV-NEXT: mv a4, a5 ; CHECK-NOV-NEXT: fcvt.l.s a7, fs0, rtz -; CHECK-NOV-NEXT: blt a6, a4, .LBB17_7 +; CHECK-NOV-NEXT: blt a6, a5, .LBB17_7 ; CHECK-NOV-NEXT: .LBB17_16: # %entry -; CHECK-NOV-NEXT: mv a6, a4 -; CHECK-NOV-NEXT: bge a7, a4, .LBB17_8 +; CHECK-NOV-NEXT: mv a6, a5 +; CHECK-NOV-NEXT: bge a7, a5, .LBB17_8 ; CHECK-NOV-NEXT: j .LBB17_9 ; ; CHECK-V-LABEL: ustest_f16i16: diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 346e40a..02825b2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -5427,18 +5427,18 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_select_op1(ptr nocapture %a, i32 signext %x) { ; CHECK-LABEL: sink_splat_select_op1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a2, 1 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: lui a3, 1 +; CHECK-NEXT: li a2, 42 +; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v8, a1 ; CHECK-NEXT: .LBB117_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmseq.vx v0, v8, a3 -; CHECK-NEXT: vmerge.vxm v8, v8, a1, v0 -; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vmseq.vx v0, v9, a2 +; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: bne a0, a2, .LBB117_1 +; CHECK-NEXT: bne a0, a3, .LBB117_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup ; CHECK-NEXT: ret entry: @@ -5472,9 +5472,8 @@ define void @sink_splat_select_op2(ptr nocapture %a, i32 signext %x) { ; CHECK-NEXT: .LBB118_1: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmseq.vx v0, v9, a2 -; CHECK-NEXT: vmerge.vvm v9, v8, v9, v0 -; CHECK-NEXT: vse32.v v9, (a0) +; CHECK-NEXT: vmsne.vx v0, v9, a2 +; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: bne a0, a1, .LBB118_1 ; CHECK-NEXT: # %bb.2: # %for.cond.cleanup diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll index 45f158f..09f42ee 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-load-store.ll @@ -443,8 +443,8 @@ define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) { ret <vscale x 1 x i64> %x } -define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) { -; CHECK-LABEL: @straightline_offset_disjoint_or( +define <vscale x 1 x i64> @straightline_offset_disjoint_or_1(ptr %p) { +; CHECK-LABEL: @straightline_offset_disjoint_or_1( ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) @@ -464,6 +464,33 @@ define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) ret <vscale x 1 x i64> %x } +define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i1 %offset) { +; CHECK-LABEL: @straightline_offset_disjoint_or( +; CHECK-NEXT: [[AND:%.*]] = zext i1 [[OFFSET:%.*]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = or disjoint i64 4, [[AND]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) +; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) +; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] +; + %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() + %step.shl = shl <vscale x 1 x i64> %step, splat (i64 1) + %add = add <vscale x 1 x i64> %step.shl, splat (i64 4) + %zext = zext i1 %offset to i64 + %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %zext, i64 0 + %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer + %offsetv = or disjoint <vscale x 1 x i64> %add, %splat + %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv + %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( + <vscale x 1 x ptr> %ptrs, + i32 8, + <vscale x 1 x i1> splat (i1 true), + <vscale x 1 x i64> poison + ) + ret <vscale x 1 x i64> %x +} + define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) { ; CHECK-LABEL: @straightline_offset_shl( ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() diff --git a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll index d0b184b..afe918b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vec3-setcc-crash.ll @@ -13,22 +13,22 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) ; RV32-NEXT: srli a2, a0, 16 -; RV32-NEXT: slli a3, a0, 16 -; RV32-NEXT: slli a4, a0, 24 -; RV32-NEXT: slli a5, a0, 8 -; RV32-NEXT: srli a6, a3, 24 -; RV32-NEXT: srai a3, a3, 24 +; RV32-NEXT: srli a3, a0, 8 +; RV32-NEXT: slli a4, a0, 16 +; RV32-NEXT: slli a5, a0, 24 +; RV32-NEXT: slli a6, a0, 8 ; RV32-NEXT: srai a4, a4, 24 ; RV32-NEXT: srai a5, a5, 24 +; RV32-NEXT: srai a6, a6, 24 +; RV32-NEXT: sgtz a6, a6 ; RV32-NEXT: sgtz a5, a5 ; RV32-NEXT: sgtz a4, a4 -; RV32-NEXT: sgtz a3, a3 -; RV32-NEXT: neg a3, a3 ; RV32-NEXT: neg a4, a4 ; RV32-NEXT: neg a5, a5 -; RV32-NEXT: and a3, a3, a6 -; RV32-NEXT: and a0, a4, a0 -; RV32-NEXT: and a2, a5, a2 +; RV32-NEXT: neg a6, a6 +; RV32-NEXT: and a3, a4, a3 +; RV32-NEXT: and a0, a5, a0 +; RV32-NEXT: and a2, a6, a2 ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: zext.b a0, a0 ; RV32-NEXT: or a0, a0, a3 @@ -39,23 +39,23 @@ define void @vec3_setcc_crash(ptr %in, ptr %out) { ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: srliw a2, a0, 16 -; RV64-NEXT: slli a3, a0, 48 -; RV64-NEXT: slli a4, a0, 56 -; RV64-NEXT: slli a5, a0, 40 -; RV64-NEXT: srli a6, a3, 56 -; RV64-NEXT: srai a3, a3, 56 +; RV64-NEXT: srli a2, a0, 16 +; RV64-NEXT: srli a3, a0, 8 +; RV64-NEXT: slli a4, a0, 48 +; RV64-NEXT: slli a5, a0, 56 +; RV64-NEXT: slli a6, a0, 40 ; RV64-NEXT: srai a4, a4, 56 ; RV64-NEXT: srai a5, a5, 56 +; RV64-NEXT: srai a6, a6, 56 +; RV64-NEXT: sgtz a6, a6 ; RV64-NEXT: sgtz a5, a5 ; RV64-NEXT: sgtz a4, a4 -; RV64-NEXT: sgtz a3, a3 -; RV64-NEXT: neg a3, a3 ; RV64-NEXT: neg a4, a4 ; RV64-NEXT: neg a5, a5 -; RV64-NEXT: and a3, a3, a6 -; RV64-NEXT: and a0, a4, a0 -; RV64-NEXT: and a2, a5, a2 +; RV64-NEXT: neg a6, a6 +; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: and a0, a5, a0 +; RV64-NEXT: and a2, a6, a2 ; RV64-NEXT: slli a3, a3, 8 ; RV64-NEXT: zext.b a0, a0 ; RV64-NEXT: or a0, a0, a3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll index 5c1e41f..b83ddce 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice.ll @@ -470,61 +470,61 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs ; CHECK-LABEL: test_vp_splice_nxv16i64: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli a5, a4, 1 -; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: slli a1, a4, 3 -; CHECK-NEXT: mv a7, a2 -; CHECK-NEXT: bltu a2, a5, .LBB22_2 +; CHECK-NEXT: slli a7, a4, 1 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: add a5, a0, a1 +; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: bltu a2, a7, .LBB22_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a7, a5 +; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 80 -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 5 -; CHECK-NEXT: sub sp, sp, a5 +; CHECK-NEXT: csrr a7, vlenb +; CHECK-NEXT: slli a7, a7, 5 +; CHECK-NEXT: sub sp, sp, a7 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: slli a7, a7, 3 +; CHECK-NEXT: vl8re64.v v24, (a5) +; CHECK-NEXT: slli a5, a6, 3 ; CHECK-NEXT: addi a6, sp, 64 -; CHECK-NEXT: mv t0, a2 +; CHECK-NEXT: add a5, a6, a5 +; CHECK-NEXT: mv a7, a2 ; CHECK-NEXT: bltu a2, a4, .LBB22_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv t0, a4 +; CHECK-NEXT: mv a7, a4 ; CHECK-NEXT: .LBB22_4: -; CHECK-NEXT: vl8re64.v v24, (a5) -; CHECK-NEXT: add a5, a6, a7 ; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a6) ; CHECK-NEXT: sub a0, a2, a4 +; CHECK-NEXT: add a6, a6, a1 +; CHECK-NEXT: sub a7, a3, a4 ; CHECK-NEXT: sltu a2, a2, a0 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add a6, a6, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: and a2, a2, a0 +; CHECK-NEXT: sltu a0, a3, a7 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a7 +; CHECK-NEXT: add a7, a5, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a6) -; CHECK-NEXT: mv a0, a3 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v24, (a7) ; CHECK-NEXT: bltu a3, a4, .LBB22_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB22_6: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v0, (a5) -; CHECK-NEXT: sub a2, a3, a4 -; CHECK-NEXT: add a5, a5, a1 -; CHECK-NEXT: sltu a3, a3, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: addi a3, sp, 104 -; CHECK-NEXT: add a1, a3, a1 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v24, (a5) -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a2, sp, 104 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -537,66 +537,66 @@ define <vscale x 16 x i64> @test_vp_splice_nxv16i64(<vscale x 16 x i64> %va, <vs define <vscale x 16 x i64> @test_vp_splice_nxv16i64_negative_offset(<vscale x 16 x i64> %va, <vscale x 16 x i64> %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { ; CHECK-LABEL: test_vp_splice_nxv16i64_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a6, a5, 1 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: slli a1, a5, 3 -; CHECK-NEXT: mv a4, a2 -; CHECK-NEXT: bltu a2, a6, .LBB23_2 +; CHECK-NEXT: csrr a4, vlenb +; CHECK-NEXT: slli a1, a4, 3 +; CHECK-NEXT: slli a7, a4, 1 +; CHECK-NEXT: addi a7, a7, -1 +; CHECK-NEXT: add a5, a0, a1 +; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: bltu a2, a7, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a4, a6 +; CHECK-NEXT: mv a6, a7 ; CHECK-NEXT: .LBB23_2: ; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 80 -; CHECK-NEXT: csrr a6, vlenb -; CHECK-NEXT: slli a6, a6, 5 -; CHECK-NEXT: sub sp, sp, a6 +; CHECK-NEXT: csrr a7, vlenb +; CHECK-NEXT: slli a7, a7, 5 +; CHECK-NEXT: sub sp, sp, a7 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: add a6, a0, a1 -; CHECK-NEXT: slli a4, a4, 3 +; CHECK-NEXT: vl8re64.v v24, (a5) +; CHECK-NEXT: slli a5, a6, 3 ; CHECK-NEXT: addi a7, sp, 64 +; CHECK-NEXT: add a6, a7, a5 ; CHECK-NEXT: mv t0, a2 -; CHECK-NEXT: bltu a2, a5, .LBB23_4 +; CHECK-NEXT: bltu a2, a4, .LBB23_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv t0, a5 +; CHECK-NEXT: mv t0, a4 ; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vl8re64.v v24, (a6) -; CHECK-NEXT: add a6, a7, a4 ; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a7) -; CHECK-NEXT: sub a0, a2, a5 +; CHECK-NEXT: sub a0, a2, a4 +; CHECK-NEXT: add a7, a7, a1 +; CHECK-NEXT: sub t0, a3, a4 ; CHECK-NEXT: sltu a2, a2, a0 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add a7, a7, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: and a2, a2, a0 +; CHECK-NEXT: sltu a0, a3, t0 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, t0 +; CHECK-NEXT: add t0, a6, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a7) -; CHECK-NEXT: mv a0, a3 -; CHECK-NEXT: bltu a3, a5, .LBB23_6 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v24, (t0) +; CHECK-NEXT: bltu a3, a4, .LBB23_6 ; CHECK-NEXT: # %bb.5: -; CHECK-NEXT: mv a0, a5 +; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB23_6: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: li a2, 8 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v0, (a6) -; CHECK-NEXT: sub a2, a3, a5 -; CHECK-NEXT: add a5, a6, a1 -; CHECK-NEXT: sltu a3, a3, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: li a3, 8 -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v24, (a5) -; CHECK-NEXT: bltu a4, a3, .LBB23_8 +; CHECK-NEXT: bltu a5, a2, .LBB23_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a4, 8 +; CHECK-NEXT: li a5, 8 ; CHECK-NEXT: .LBB23_8: -; CHECK-NEXT: sub a2, a6, a4 +; CHECK-NEXT: sub a2, a6, a5 ; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll new file mode 100644 index 0000000..9e08938 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -0,0 +1,1008 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x i8>, i32 } %load +} + +define { <vscale x 1 x i8>, i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i8>, i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x i8>, i32 } %load +} + +define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x i8>, i32 } %load +} + +define { <vscale x 2 x i8>, i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i8>, i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x i8>, i32 } %load +} + +define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x i8>, i32 } %load +} + +define { <vscale x 4 x i8>, i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i8>, i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x i8>, i32 } %load +} + +define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x i8>, i32 } %load +} + +define { <vscale x 8 x i8>, i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i8>, i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x i8>, i32 } %load +} + +define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x i8>, i32 } %load +} + +define { <vscale x 16 x i8>, i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i8>, i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x i8>, i32 } %load +} + +define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl) + ret { <vscale x 32 x i8>, i32 } %load +} + +define { <vscale x 32 x i8>, i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x i8>, i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 32 x i8>, i32 } %load +} + +define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8(ptr %ptr, <vscale x 64 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> %m, i32 %evl) + ret { <vscale x 64 x i8>, i32 } %load +} + +define { <vscale x 64 x i8>, i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv64i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 64 x i8>, i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, <vscale x 64 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 64 x i8>, i32 } %load +} + +define <vscale x 128 x i8> @vploadff_nxv128i8(ptr %ptr, ptr %evl_out, <vscale x 128 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: bltu a2, a3, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: sw a0, 0(a1) +; CHECK-NEXT: ret + %load = call { <vscale x 128 x i8>, i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, <vscale x 128 x i1> %m, i32 %evl) + %result0 = extractvalue { <vscale x 128 x i8>, i32 } %load, 0 + %result1 = extractvalue { <vscale x 128 x i8>, i32 } %load, 1 + store i32 %result1, ptr %evl_out + ret <vscale x 128 x i8> %result0 +} + +define <vscale x 128 x i8> @vploadff_nxv128i8_allones_mask(ptr %ptr, ptr %evl_out, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv128i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: bltu a2, a3, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: sw a0, 0(a1) +; CHECK-NEXT: ret + %load = call { <vscale x 128 x i8>, i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, <vscale x 128 x i1> splat (i1 true), i32 %evl) + %result0 = extractvalue { <vscale x 128 x i8>, i32 } %load, 0 + %result1 = extractvalue { <vscale x 128 x i8>, i32 } %load, 1 + store i32 %result1, ptr %evl_out + ret <vscale x 128 x i8> %result0 +} + +define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x i16>, i32 } %load +} + +define { <vscale x 1 x i16>, i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i16>, i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x i16>, i32 } %load +} + +define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x i16>, i32 } %load +} + +define { <vscale x 2 x i16>, i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i16>, i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x i16>, i32 } %load +} + +define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x i16>, i32 } %load +} + +define { <vscale x 4 x i16>, i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i16>, i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x i16>, i32 } %load +} + +define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x i16>, i32 } %load +} + +define { <vscale x 8 x i16>, i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i16>, i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x i16>, i32 } %load +} + +define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x i16>, i32 } %load +} + +define { <vscale x 16 x i16>, i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i16>, i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x i16>, i32 } %load +} + +define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl) + ret { <vscale x 32 x i16>, i32 } %load +} + +define { <vscale x 32 x i16>, i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x i16>, i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 32 x i16>, i32 } %load +} + +define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x i32>, i32 } %load +} + +define { <vscale x 1 x i32>, i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i32>, i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x i32>, i32 } %load +} + +define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x i32>, i32 } %load +} + +define { <vscale x 2 x i32>, i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i32>, i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x i32>, i32 } %load +} + +define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x i32>, i32 } %load +} + +define { <vscale x 4 x i32>, i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i32>, i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x i32>, i32 } %load +} + +define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x i32>, i32 } %load +} + +define { <vscale x 8 x i32>, i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i32>, i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x i32>, i32 } %load +} + +define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x i32>, i32 } %load +} + +define { <vscale x 16 x i32>, i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x i32>, i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x i32>, i32 } %load +} + +define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x i64>, i32 } %load +} + +define { <vscale x 1 x i64>, i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x i64>, i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x i64>, i32 } %load +} + +define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x i64>, i32 } %load +} + +define { <vscale x 2 x i64>, i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x i64>, i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x i64>, i32 } %load +} + +define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x i64>, i32 } %load +} + +define { <vscale x 4 x i64>, i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x i64>, i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x i64>, i32 } %load +} + +define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x i64>, i32 } %load +} + +define { <vscale x 8 x i64>, i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x i64>, i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x i64>, i32 } %load +} + +define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x half>, i32 } %load +} + +define { <vscale x 1 x half>, i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x half>, i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x half>, i32 } %load +} + +define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x half>, i32 } %load +} + +define { <vscale x 2 x half>, i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x half>, i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x half>, i32 } %load +} + +define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x half>, i32 } %load +} + +define { <vscale x 4 x half>, i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x half>, i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x half>, i32 } %load +} + +define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x half>, i32 } %load +} + +define { <vscale x 8 x half>, i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x half>, i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x half>, i32 } %load +} + +define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x half>, i32 } %load +} + +define { <vscale x 16 x half>, i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x half>, i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x half>, i32 } %load +} + +define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl) + ret { <vscale x 32 x half>, i32 } %load +} + +define { <vscale x 32 x half>, i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x half>, i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 32 x half>, i32 } %load +} + +define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x float>, i32 } %load +} + +define { <vscale x 1 x float>, i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x float>, i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x float>, i32 } %load +} + +define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x float>, i32 } %load +} + +define { <vscale x 2 x float>, i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x float>, i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x float>, i32 } %load +} + +define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x float>, i32 } %load +} + +define { <vscale x 4 x float>, i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x float>, i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x float>, i32 } %load +} + +define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x float>, i32 } %load +} + +define { <vscale x 8 x float>, i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x float>, i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x float>, i32 } %load +} + +define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x float>, i32 } %load +} + +define { <vscale x 16 x float>, i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x float>, i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x float>, i32 } %load +} + +define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x double>, i32 } %load +} + +define { <vscale x 1 x double>, i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x double>, i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x double>, i32 } %load +} + +define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x double>, i32 } %load +} + +define { <vscale x 2 x double>, i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x double>, i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x double>, i32 } %load +} + +define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x double>, i32 } %load +} + +define { <vscale x 4 x double>, i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x double>, i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x double>, i32 } %load +} + +define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x double>, i32 } %load +} + +define { <vscale x 8 x double>, i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x double>, i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x double>, i32 } %load +} + +define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16(ptr %ptr, <vscale x 1 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> %m, i32 %evl) + ret { <vscale x 1 x bfloat>, i32 } %load +} + +define { <vscale x 1 x bfloat>, i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 1 x bfloat>, i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, <vscale x 1 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 1 x bfloat>, i32 } %load +} + +define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16(ptr %ptr, <vscale x 2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> %m, i32 %evl) + ret { <vscale x 2 x bfloat>, i32 } %load +} + +define { <vscale x 2 x bfloat>, i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 2 x bfloat>, i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 2 x bfloat>, i32 } %load +} + +define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> %m, i32 %evl) + ret { <vscale x 4 x bfloat>, i32 } %load +} + +define { <vscale x 4 x bfloat>, i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 4 x bfloat>, i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 4 x bfloat>, i32 } %load +} + +define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16(ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> %m, i32 %evl) + ret { <vscale x 8 x bfloat>, i32 } %load +} + +define { <vscale x 8 x bfloat>, i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 8 x bfloat>, i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 8 x bfloat>, i32 } %load +} + +define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16(ptr %ptr, <vscale x 16 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> %m, i32 %evl) + ret { <vscale x 16 x bfloat>, i32 } %load +} + +define { <vscale x 16 x bfloat>, i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 16 x bfloat>, i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 16 x bfloat>, i32 } %load +} + +define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16(ptr %ptr, <vscale x 32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> %m, i32 %evl) + ret { <vscale x 32 x bfloat>, i32 } %load +} + +define { <vscale x 32 x bfloat>, i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 32 x bfloat>, i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, <vscale x 32 x i1> splat (i1 true), i32 %evl) + ret { <vscale x 32 x bfloat>, i32 } %load +} + +define { <vscale x 3 x i8>, i32 } @vploadff_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <vscale x 3 x i8>, i32 } @llvm.vp.load.ff.nxv3i8.p0(ptr %ptr, <vscale x 3 x i1> %m, i32 %evl) + ret { <vscale x 3 x i8>, i32 } %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll index f29c74a..697c582 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll @@ -21,7 +21,7 @@ define <vscale x 4 x i32> @intrinsic_vsha2cl_vv_nxv4i32_nxv4i32(<vscale x 4 x i3 ; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vsha2ch.vv v8, v10, v12 +; CHECK-NEXT: vsha2cl.vv v8, v10, v12 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i32> @llvm.riscv.vsha2cl.nxv4i32.nxv4i32( @@ -45,7 +45,7 @@ define <vscale x 8 x i32> @intrinsic_vsha2cl_vv_nxv8i32_nxv8i32(<vscale x 8 x i3 ; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma -; CHECK-NEXT: vsha2ch.vv v8, v12, v16 +; CHECK-NEXT: vsha2cl.vv v8, v12, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i32> @llvm.riscv.vsha2cl.nxv8i32.nxv8i32( @@ -70,7 +70,7 @@ define <vscale x 16 x i32> @intrinsic_vsha2cl_vv_nxv16i32_nxv16i32(<vscale x 16 ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: vsha2cl.vv v8, v16, v24 ; CHECK-NEXT: ret entry: %a = call <vscale x 16 x i32> @llvm.riscv.vsha2cl.nxv16i32.nxv16i32( @@ -94,7 +94,7 @@ define <vscale x 4 x i64> @intrinsic_vsha2cl_vv_nxv4i64_nxv4i64(<vscale x 4 x i6 ; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vsha2ch.vv v8, v12, v16 +; CHECK-NEXT: vsha2cl.vv v8, v12, v16 ; CHECK-NEXT: ret entry: %a = call <vscale x 4 x i64> @llvm.riscv.vsha2cl.nxv4i64.nxv4i64( @@ -119,7 +119,7 @@ define <vscale x 8 x i64> @intrinsic_vsha2cl_vv_nxv8i64_nxv8i64(<vscale x 8 x i6 ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: vsha2cl.vv v8, v16, v24 ; CHECK-NEXT: ret entry: %a = call <vscale x 8 x i64> @llvm.riscv.vsha2cl.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index c9c49e8..cb046cd 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -204,18 +204,16 @@ define i64 @load_i64(ptr %p) { ; RV64IZBKB-NEXT: lbu a2, 5(a0) ; RV64IZBKB-NEXT: lbu a3, 6(a0) ; RV64IZBKB-NEXT: lbu a4, 7(a0) -; RV64IZBKB-NEXT: lbu a5, 0(a0) -; RV64IZBKB-NEXT: lbu a6, 1(a0) -; RV64IZBKB-NEXT: lbu a7, 2(a0) -; RV64IZBKB-NEXT: lbu a0, 3(a0) +; RV64IZBKB-NEXT: lbu a5, 1(a0) +; RV64IZBKB-NEXT: lbu a6, 2(a0) +; RV64IZBKB-NEXT: lbu a7, 3(a0) +; RV64IZBKB-NEXT: lbu a0, 0(a0) +; RV64IZBKB-NEXT: packh a3, a3, a4 ; RV64IZBKB-NEXT: packh a1, a1, a2 -; RV64IZBKB-NEXT: packh a2, a3, a4 -; RV64IZBKB-NEXT: packh a3, a5, a6 -; RV64IZBKB-NEXT: packh a0, a7, a0 -; RV64IZBKB-NEXT: slli a2, a2, 16 -; RV64IZBKB-NEXT: slli a0, a0, 16 -; RV64IZBKB-NEXT: or a1, a2, a1 -; RV64IZBKB-NEXT: or a0, a0, a3 +; RV64IZBKB-NEXT: packh a2, a6, a7 +; RV64IZBKB-NEXT: packh a0, a0, a5 +; RV64IZBKB-NEXT: packw a1, a1, a3 +; RV64IZBKB-NEXT: packw a0, a0, a2 ; RV64IZBKB-NEXT: pack a0, a0, a1 ; RV64IZBKB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll new file mode 100644 index 0000000..988bb6f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/xqcilsm-memset.ll @@ -0,0 +1,900 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+experimental-xqcilsm < %s \ +; RUN: | FileCheck %s -check-prefixes=RV32IXQCILSM + +%struct.anon = type { [16 x i32] } +%struct.anon.0 = type { [47 x i32] } +%struct.anon.1 = type { [48 x i32] } +%struct.anon.2 = type { [64 x i8] } +%struct.struct1_t = type { [16 x i32] } + +@struct1 = common dso_local local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@struct4b = common dso_local local_unnamed_addr global %struct.anon.0 zeroinitializer, align 4 +@struct4b1 = common dso_local local_unnamed_addr global %struct.anon.1 zeroinitializer, align 4 +@struct2 = common dso_local local_unnamed_addr global %struct.anon.2 zeroinitializer, align 1 +@arr1 = common dso_local local_unnamed_addr global [100 x i32] zeroinitializer, align 4 +@struct1_ = common dso_local local_unnamed_addr global %struct.struct1_t zeroinitializer, align 4 + +define void @test1(ptr nocapture %p, i32 %n) nounwind { +; RV32I-LABEL: test1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test1: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: mv a2, a1 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 %p, i8 0, i32 %n, i1 false) + ret void +} + +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) + +define void @test2(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test2: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2a(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test2a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 124(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 188, i1 false) + ret void +} + +define void @test2b(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test2b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a1, 165 +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 192, i1 false) + ret void +} + +define void @test2c(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test2c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 678490 +; RV32IXQCILSM-NEXT: addi a1, a1, 1445 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 128, i1 false) + ret void +} + +define void @test2d(ptr nocapture %p) nounwind { +; RV32I-LABEL: test2d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, -91 +; RV32I-NEXT: lui a2, 1048570 +; RV32I-NEXT: lui a3, 678490 +; RV32I-NEXT: addi a2, a2, 1445 +; RV32I-NEXT: addi a3, a3, 1445 +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sh a2, 8(a0) +; RV32I-NEXT: sb a1, 10(a0) +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test2d: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a1, -91 +; RV32IXQCILSM-NEXT: lui a2, 1048570 +; RV32IXQCILSM-NEXT: lui a3, 678490 +; RV32IXQCILSM-NEXT: addi a2, a2, 1445 +; RV32IXQCILSM-NEXT: addi a3, a3, 1445 +; RV32IXQCILSM-NEXT: sw a3, 0(a0) +; RV32IXQCILSM-NEXT: sw a3, 4(a0) +; RV32IXQCILSM-NEXT: sh a2, 8(a0) +; RV32IXQCILSM-NEXT: sb a1, 10(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 -91, i32 11, i1 false) + ret void +} + + +define ptr @test3(ptr %p) nounwind { +; RV32I-LABEL: test3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 256 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test3: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: li a2, 256 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 256, i1 false) + ret ptr %p +} + +define ptr @test3a(ptr %p) nounwind { +; RV32I-LABEL: test3a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test3a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %p, i8 0, i32 128, i1 false) + ret ptr %p +} + +define void @test4() nounwind { +; RV32I-LABEL: test4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct1) +; RV32I-NEXT: addi a0, a0, %lo(struct1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test4: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(struct1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct1, i8 0, i32 64, i1 false) + ret void +} + +define void @test4a(ptr nocapture %s) nounwind { +; RV32I-LABEL: test4a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 166 +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test4a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, 682602 +; RV32IXQCILSM-NEXT: addi a1, a1, 1702 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 %s, i8 -90, i32 64, i1 false) + ret void +} + +declare void @llvm.lifetime.start.p0(i64, ptr nocapture) + +declare void @llvm.lifetime.end.p0(i64, ptr nocapture) + +define void @test4b() nounwind { +; RV32I-LABEL: test4b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lui a0, %hi(struct4b) +; RV32I-NEXT: addi a0, a0, %lo(struct4b) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: call memset +; RV32I-NEXT: lui a0, %hi(struct4b1) +; RV32I-NEXT: addi a0, a0, %lo(struct4b1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test4b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a1, %hi(struct4b) +; RV32IXQCILSM-NEXT: addi a1, a1, %lo(struct4b) +; RV32IXQCILSM-NEXT: lui a0, %hi(struct4b1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct4b1) +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 124(a1) +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b, i8 0, i32 188, i1 false) + tail call void @llvm.memset.p0.i32(ptr align 4 @struct4b1, i8 0, i32 192, i1 false) + ret void +} + +define void @test5() nounwind { +; RV32I-LABEL: test5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(struct2) +; RV32I-NEXT: addi a0, a0, %lo(struct2) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test5: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(struct2) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(struct2) +; RV32IXQCILSM-NEXT: li a2, 64 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 1 @struct2, i8 0, i32 64, i1 false) + ret void +} + +define i32 @test6() nounwind { +; RV32I-LABEL: test6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define zeroext i8 @test6b_c() nounwind { +; RV32I-LABEL: test6b_c: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 12(sp) +; RV32I-NEXT: lbu a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6b_c: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sb zero, 12(sp) +; RV32IXQCILSM-NEXT: lbu a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret + %x = alloca i8, align 4 + call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 4 %x, i8 0, i32 1, i1 false) + %x.0.x.0. = load volatile i8, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %x) + ret i8 %x.0.x.0. +} + +define signext i16 @test6b_s() nounwind { +; RV32I-LABEL: test6b_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 12(sp) +; RV32I-NEXT: lh a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6b_s: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sh zero, 12(sp) +; RV32IXQCILSM-NEXT: lh a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i16, align 4 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %x) + store i16 0, ptr %x, align 4 + %x.0.x.0. = load volatile i16, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %x) + ret i16 %x.0.x.0. +} + +define i32 @test6b_l() nounwind { +; RV32I-LABEL: test6b_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6b_l: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: lw a0, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %x) + store i32 0, ptr %x, align 4 + %x.0.x.0. = load volatile i32, ptr %x, align 4 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %x) + ret i32 %x.0.x.0. +} + +define i64 @test6b_ll() nounwind { +; RV32I-LABEL: test6b_ll: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6b_ll: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 8(sp) +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: lw a0, 8(sp) +; RV32IXQCILSM-NEXT: lw a1, 12(sp) +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i64, align 8 + call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %x) + call void @llvm.memset.p0.i32(ptr nonnull align 8 %x, i8 0, i32 8, i1 false) + %x.0.x.0. = load volatile i64, ptr %x, align 8 + call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %x) + ret i64 %x.0.x.0. +} + +define zeroext i8 @test6c_c() nounwind { +; RV32I-LABEL: test6c_c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sb zero, 15(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6c_c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sb zero, 15(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i8 + call void @llvm.memset.p0.i32(ptr align 1 %x, i8 0, i32 1, i1 false) + %0 = load i8, ptr %x, align 1 + ret i8 %0 +} + +define signext i16 @test6c_s() nounwind { +; RV32I-LABEL: test6c_s: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sh zero, 14(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6c_s: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sh zero, 14(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i16 + call void @llvm.memset.p0.i32(ptr align 2 %x, i8 0, i32 2, i1 false) + %0 = load i16, ptr %x, align 2 + ret i16 %0 +} + +define i32 @test6c_l() nounwind { +; RV32I-LABEL: test6c_l: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6c_l: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i32, align 4 + call void @llvm.memset.p0.i32(ptr align 4 %x, i8 0, i32 4, i1 false) + %0 = load i32, ptr %x, align 4 + ret i32 %0 +} + +define i64 @test6c_ll() nounwind { +; RV32I-LABEL: test6c_ll: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw zero, 8(sp) +; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test6c_ll: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: addi sp, sp, -16 +; RV32IXQCILSM-NEXT: sw zero, 8(sp) +; RV32IXQCILSM-NEXT: sw zero, 12(sp) +; RV32IXQCILSM-NEXT: li a0, 0 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: addi sp, sp, 16 +; RV32IXQCILSM-NEXT: ret +entry: + %x = alloca i64, align 8 + call void @llvm.memset.p0.i32(ptr align 8 %x, i8 0, i32 8, i1 false) + %0 = load i64, ptr %x, align 8 + ret i64 %0 +} + +define void @test7() nounwind { +; RV32I-LABEL: test7: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test7: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: ret + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 8, i1 false) + ret void +} + +define void @test7a() nounwind { +; RV32I-LABEL: test7a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test7a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: ret +entry: + call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 0, i1 false) + ret void +} + +define void @test7a_unalign() nounwind { +; RV32I-LABEL: test7a_unalign: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: li a1, -1 +; RV32I-NEXT: sw a1, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sb a1, 16(a0) +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test7a_unalign: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: li a1, -1 +; RV32IXQCILSM-NEXT: sw a1, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw a1, 4(a0) +; RV32IXQCILSM-NEXT: sw a1, 8(a0) +; RV32IXQCILSM-NEXT: sw a1, 12(a0) +; RV32IXQCILSM-NEXT: sb a1, 16(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 17, i1 false) + ret void +} + +define void @test7b() nounwind { +; RV32I-LABEL: test7b: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 255 +; RV32I-NEXT: li a2, 68 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test7b: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a1, -1 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 1, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -1, i32 68, i1 false) + ret void +} + +define void @test7c() nounwind { +; RV32I-LABEL: test7c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 128 +; RV32I-NEXT: li a2, 128 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test7c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 526344 +; RV32IXQCILSM-NEXT: addi a1, a1, 128 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -128, i32 128, i1 false) + ret void +} + +define void @test7d() nounwind { +; RV32I-LABEL: test7d: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: li a2, 148 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test7d: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 53457 +; RV32IXQCILSM-NEXT: addi a1, a1, -755 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 6, 124(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 13, i32 148, i1 false) + ret void +} + +define void @test7e() nounwind { +; RV32I-LABEL: test7e: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 239 +; RV32I-NEXT: li a2, 100 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test7e: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: lui a1, 982783 +; RV32IXQCILSM-NEXT: addi a1, a1, -17 +; RV32IXQCILSM-NEXT: qc.setwmi a1, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi a1, 9, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -17, i32 100, i1 false) + ret void +} + +define void @test8() nounwind { +; RV32I-LABEL: test8: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw zero, 8(a0) +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test8: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: sw zero, 8(a0) +; RV32IXQCILSM-NEXT: sw zero, 12(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 16, i1 false) + ret void +} + +define void @test9() nounwind { +; RV32I-LABEL: test9: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: sw zero, %lo(arr1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: sw zero, 20(a0) +; RV32I-NEXT: sw zero, 24(a0) +; RV32I-NEXT: sw zero, 28(a0) +; RV32I-NEXT: sw zero, 4(a0) +; RV32I-NEXT: sw zero, 8(a0) +; RV32I-NEXT: sw zero, 12(a0) +; RV32I-NEXT: sw zero, 16(a0) +; RV32I-NEXT: ret +; +; RV32IXQCILSM-LABEL: test9: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: sw zero, %lo(arr1)(a0) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: sw zero, 20(a0) +; RV32IXQCILSM-NEXT: sw zero, 24(a0) +; RV32IXQCILSM-NEXT: sw zero, 28(a0) +; RV32IXQCILSM-NEXT: sw zero, 4(a0) +; RV32IXQCILSM-NEXT: sw zero, 8(a0) +; RV32IXQCILSM-NEXT: sw zero, 12(a0) +; RV32IXQCILSM-NEXT: sw zero, 16(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 32, i1 false) + ret void +} + +define void @test10() nounwind { +; RV32I-LABEL: test10: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 60 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test10: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 0(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 60, i1 false) + ret void +} + +define void @test11() nounwind { +; RV32I-LABEL: test11: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 64 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test11: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 64, i1 false) + ret void +} + +define void @test12() nounwind { +; RV32I-LABEL: test12: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 120 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test12: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 14, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 120, i1 false) + ret void +} + +define void @test13() nounwind { +; RV32I-LABEL: test13: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 124 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test13: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 124, i1 false) + ret void +} + +define void @test14() nounwind { +; RV32I-LABEL: test14: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 180 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test14: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 14, 124(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 180, i1 false) + ret void +} + +define void @test15() nounwind { +; RV32I-LABEL: test15: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 184 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test15: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 124(a0) +; RV32IXQCILSM-NEXT: ret +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 184, i1 false) + ret void +} + +define void @test15a() nounwind { +; RV32I-LABEL: test15a: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a1, 165 +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test15a: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a1, 165 +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 -91, i32 192, i1 false) + ret void +} + +define void @test15b() nounwind { +; RV32I-LABEL: test15b: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 188 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test15b: +; RV32IXQCILSM: # %bb.0: +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 0(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 15, 64(a0) +; RV32IXQCILSM-NEXT: qc.setwmi zero, 16, 124(a0) +; RV32IXQCILSM-NEXT: ret + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 188, i1 false) + ret void +} + +define void @test15c() nounwind { +; RV32I-LABEL: test15c: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a0, %hi(arr1) +; RV32I-NEXT: addi a0, a0, %lo(arr1) +; RV32I-NEXT: li a2, 192 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: tail memset +; +; RV32IXQCILSM-LABEL: test15c: +; RV32IXQCILSM: # %bb.0: # %entry +; RV32IXQCILSM-NEXT: lui a0, %hi(arr1) +; RV32IXQCILSM-NEXT: addi a0, a0, %lo(arr1) +; RV32IXQCILSM-NEXT: li a2, 192 +; RV32IXQCILSM-NEXT: li a1, 0 +; RV32IXQCILSM-NEXT: tail memset +entry: + tail call void @llvm.memset.p0.i32(ptr align 4 @arr1, i8 0, i32 192, i1 false) + ret void +} |