; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=riscv64-- -mattr=+m,+v,+f | FileCheck %s -check-prefix=RISCV define void @test_masked_store_success_v4i8(<4 x i8> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4i8: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vse8.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x i8>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x i8> %x, <4 x i8> %load store <4 x i8> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4i16(<4 x i16> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4i16: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RISCV-NEXT: vse16.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x i16>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x i16> %x, <4 x i16> %load store <4 x i16> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x i32>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %load store <4 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4i64(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4i64: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RISCV-NEXT: vse64.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x i64>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %load store <4 x i64> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4f16(<4 x half> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4f16: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RISCV-NEXT: vmv1r.v v9, v0 ; RISCV-NEXT: vfirst.m a3, v0 ; RISCV-NEXT: mv a2, a0 ; RISCV-NEXT: beqz a3, .LBB4_2 ; RISCV-NEXT: # %bb.1: ; RISCV-NEXT: mv a2, a1 ; RISCV-NEXT: .LBB4_2: ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vmv.v.i v8, 0 ; RISCV-NEXT: vmv1r.v v0, v9 ; RISCV-NEXT: vmerge.vim v8, v8, 1, v0 ; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RISCV-NEXT: vslidedown.vi v8, v8, 2 ; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RISCV-NEXT: vmsne.vi v8, v8, 0 ; RISCV-NEXT: vmv.v.i v10, 0 ; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: vmerge.vim v11, v10, 1, v0 ; RISCV-NEXT: vslidedown.vi v11, v11, 1 ; RISCV-NEXT: vmv.x.s a3, v11 ; RISCV-NEXT: andi a3, a3, 1 ; RISCV-NEXT: bnez a3, .LBB4_4 ; RISCV-NEXT: # %bb.3: ; RISCV-NEXT: addi a3, a1, 6 ; RISCV-NEXT: j .LBB4_5 ; RISCV-NEXT: .LBB4_4: ; RISCV-NEXT: addi a3, a0, 24 ; RISCV-NEXT: .LBB4_5: ; RISCV-NEXT: vmv1r.v v0, v9 ; RISCV-NEXT: vmerge.vim v9, v10, 1, v0 ; RISCV-NEXT: vslidedown.vi v9, v9, 1 ; RISCV-NEXT: vmv.x.s a4, v9 ; RISCV-NEXT: andi a4, a4, 1 ; RISCV-NEXT: bnez a4, .LBB4_7 ; RISCV-NEXT: # %bb.6: ; RISCV-NEXT: addi a5, a1, 2 ; RISCV-NEXT: j .LBB4_8 ; RISCV-NEXT: .LBB4_7: ; RISCV-NEXT: addi a5, a0, 8 ; RISCV-NEXT: .LBB4_8: ; RISCV-NEXT: lh a4, 0(a2) ; RISCV-NEXT: lh a2, 0(a3) ; RISCV-NEXT: lh a3, 0(a5) ; RISCV-NEXT: vfirst.m a5, v8 ; RISCV-NEXT: beqz a5, .LBB4_10 ; RISCV-NEXT: # %bb.9: ; RISCV-NEXT: addi a0, a1, 4 ; RISCV-NEXT: j .LBB4_11 ; RISCV-NEXT: .LBB4_10: ; RISCV-NEXT: addi a0, a0, 16 ; RISCV-NEXT: .LBB4_11: ; RISCV-NEXT: lh a0, 0(a0) ; RISCV-NEXT: sh a4, 0(a1) ; RISCV-NEXT: sh a3, 2(a1) ; RISCV-NEXT: sh a0, 4(a1) ; RISCV-NEXT: sh a2, 6(a1) ; RISCV-NEXT: ret %load = load <4 x half>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x half> %x, <4 x half> %load store <4 x half> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4f32(<4 x float> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4f32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x float>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x float> %x, <4 x float> %load store <4 x float> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v4f64(<4 x double> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v4f64: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RISCV-NEXT: vse64.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x double>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x double> %x, <4 x double> %load store <4 x double> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8i8(<8 x i8> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8i8: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RISCV-NEXT: vse8.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x i8>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i8> %x, <8 x i8> %load store <8 x i8> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8i16(<8 x i16> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8i16: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; RISCV-NEXT: vse16.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x i16>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %load store <8 x i16> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x i32>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load store <8 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8i64(<8 x i64> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8i64: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RISCV-NEXT: vse64.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x i64>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load store <8 x i64> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8f16(<8 x half> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8f16: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RISCV-NEXT: vmv1r.v v8, v0 ; RISCV-NEXT: vfirst.m a3, v0 ; RISCV-NEXT: mv a2, a0 ; RISCV-NEXT: beqz a3, .LBB11_2 ; RISCV-NEXT: # %bb.1: ; RISCV-NEXT: mv a2, a1 ; RISCV-NEXT: .LBB11_2: ; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RISCV-NEXT: vmv.v.i v9, 0 ; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: vmerge.vim v9, v9, 1, v0 ; RISCV-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RISCV-NEXT: vslidedown.vi v9, v9, 4 ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vmsne.vi v11, v9, 0 ; RISCV-NEXT: vmv.v.i v10, 0 ; RISCV-NEXT: vmv1r.v v0, v11 ; RISCV-NEXT: vmerge.vim v9, v10, 1, v0 ; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RISCV-NEXT: vslidedown.vi v9, v9, 2 ; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RISCV-NEXT: vmsne.vi v9, v9, 0 ; RISCV-NEXT: vmv.v.i v12, 0 ; RISCV-NEXT: vmv1r.v v0, v9 ; RISCV-NEXT: vmerge.vim v13, v12, 1, v0 ; RISCV-NEXT: vslidedown.vi v13, v13, 1 ; RISCV-NEXT: vmv.x.s a3, v13 ; RISCV-NEXT: andi a3, a3, 1 ; RISCV-NEXT: bnez a3, .LBB11_4 ; RISCV-NEXT: # %bb.3: ; RISCV-NEXT: addi a3, a1, 14 ; RISCV-NEXT: j .LBB11_5 ; RISCV-NEXT: .LBB11_4: ; RISCV-NEXT: addi a3, a0, 56 ; RISCV-NEXT: .LBB11_5: ; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vmerge.vim v10, v10, 1, v0 ; RISCV-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RISCV-NEXT: vslidedown.vi v10, v10, 2 ; RISCV-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RISCV-NEXT: vmsne.vi v10, v10, 0 ; RISCV-NEXT: vmv1r.v v0, v10 ; RISCV-NEXT: vmerge.vim v13, v12, 1, v0 ; RISCV-NEXT: vslidedown.vi v13, v13, 1 ; RISCV-NEXT: vmv.x.s a4, v13 ; RISCV-NEXT: andi a4, a4, 1 ; RISCV-NEXT: bnez a4, .LBB11_8 ; RISCV-NEXT: # %bb.6: ; RISCV-NEXT: addi a4, a1, 6 ; RISCV-NEXT: vfirst.m a5, v11 ; RISCV-NEXT: bnez a5, .LBB11_9 ; RISCV-NEXT: .LBB11_7: ; RISCV-NEXT: addi a5, a0, 32 ; RISCV-NEXT: j .LBB11_10 ; RISCV-NEXT: .LBB11_8: ; RISCV-NEXT: addi a4, a0, 24 ; RISCV-NEXT: vfirst.m a5, v11 ; RISCV-NEXT: beqz a5, .LBB11_7 ; RISCV-NEXT: .LBB11_9: ; RISCV-NEXT: addi a5, a1, 8 ; RISCV-NEXT: .LBB11_10: ; RISCV-NEXT: vmv1r.v v0, v11 ; RISCV-NEXT: vmerge.vim v11, v12, 1, v0 ; RISCV-NEXT: vslidedown.vi v11, v11, 1 ; RISCV-NEXT: vmv.x.s a6, v11 ; RISCV-NEXT: andi a6, a6, 1 ; RISCV-NEXT: bnez a6, .LBB11_14 ; RISCV-NEXT: # %bb.11: ; RISCV-NEXT: addi a6, a1, 10 ; RISCV-NEXT: vfirst.m a7, v9 ; RISCV-NEXT: bnez a7, .LBB11_15 ; RISCV-NEXT: .LBB11_12: ; RISCV-NEXT: addi a7, a0, 48 ; RISCV-NEXT: vfirst.m t0, v10 ; RISCV-NEXT: bnez t0, .LBB11_16 ; RISCV-NEXT: .LBB11_13: ; RISCV-NEXT: addi t1, a0, 16 ; RISCV-NEXT: j .LBB11_17 ; RISCV-NEXT: .LBB11_14: ; RISCV-NEXT: addi a6, a0, 40 ; RISCV-NEXT: vfirst.m a7, v9 ; RISCV-NEXT: beqz a7, .LBB11_12 ; RISCV-NEXT: .LBB11_15: ; RISCV-NEXT: addi a7, a1, 12 ; RISCV-NEXT: vfirst.m t0, v10 ; RISCV-NEXT: beqz t0, .LBB11_13 ; RISCV-NEXT: .LBB11_16: ; RISCV-NEXT: addi t1, a1, 4 ; RISCV-NEXT: .LBB11_17: ; RISCV-NEXT: vmv1r.v v0, v8 ; RISCV-NEXT: lh t0, 0(a2) ; RISCV-NEXT: lh a2, 0(a3) ; RISCV-NEXT: lh a3, 0(a4) ; RISCV-NEXT: lh a4, 0(a5) ; RISCV-NEXT: lh a5, 0(a6) ; RISCV-NEXT: lh a6, 0(a7) ; RISCV-NEXT: lh a7, 0(t1) ; RISCV-NEXT: vmerge.vim v8, v12, 1, v0 ; RISCV-NEXT: vslidedown.vi v8, v8, 1 ; RISCV-NEXT: vmv.x.s t1, v8 ; RISCV-NEXT: andi t1, t1, 1 ; RISCV-NEXT: bnez t1, .LBB11_19 ; RISCV-NEXT: # %bb.18: ; RISCV-NEXT: addi a0, a1, 2 ; RISCV-NEXT: j .LBB11_20 ; RISCV-NEXT: .LBB11_19: ; RISCV-NEXT: addi a0, a0, 8 ; RISCV-NEXT: .LBB11_20: ; RISCV-NEXT: lh a0, 0(a0) ; RISCV-NEXT: sh t0, 0(a1) ; RISCV-NEXT: sh a0, 2(a1) ; RISCV-NEXT: sh a7, 4(a1) ; RISCV-NEXT: sh a3, 6(a1) ; RISCV-NEXT: sh a4, 8(a1) ; RISCV-NEXT: sh a5, 10(a1) ; RISCV-NEXT: sh a6, 12(a1) ; RISCV-NEXT: sh a2, 14(a1) ; RISCV-NEXT: ret %load = load <8 x half>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x half> %x, <8 x half> %load store <8 x half> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8f32(<8 x float> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8f32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x float>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x float> %x, <8 x float> %load store <8 x float> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v8f64(<8 x double> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v8f64: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RISCV-NEXT: vse64.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x double>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x double> %x, <8 x double> %load store <8 x double> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v16i8(<16 x i8> %x, ptr %ptr, <16 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v16i8: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RISCV-NEXT: vse8.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <16 x i8>, ptr %ptr, align 32 %sel = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %load store <16 x i8> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v16i16(<16 x i16> %x, ptr %ptr, <16 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v16i16: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; RISCV-NEXT: vse16.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <16 x i16>, ptr %ptr, align 32 %sel = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> %load store <16 x i16> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v16i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <16 x i32>, ptr %ptr, align 32 %sel = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %load store <16 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v32i8(<32 x i8> %x, ptr %ptr, <32 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v32i8: ; RISCV: # %bb.0: ; RISCV-NEXT: li a1, 32 ; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RISCV-NEXT: vse8.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <32 x i8>, ptr %ptr, align 32 %sel = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %load store <32 x i8> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v32i16(<32 x i16> %x, ptr %ptr, <32 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v32i16: ; RISCV: # %bb.0: ; RISCV-NEXT: li a1, 32 ; RISCV-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; RISCV-NEXT: vse16.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <32 x i16>, ptr %ptr, align 32 %sel = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> %load store <32 x i16> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_v64i8(<64 x i8> %x, ptr %ptr, <64 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_v64i8: ; RISCV: # %bb.0: ; RISCV-NEXT: li a1, 64 ; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RISCV-NEXT: vse8.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <64 x i8>, ptr %ptr, align 32 %sel = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %load store <64 x i8> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_invert_mask_v4i32(<4 x i32> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_invert_mask_v4i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RISCV-NEXT: vmnot.m v0, v0 ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <4 x i32>, ptr %ptr, align 32 %sel = select <4 x i1> %mask, <4 x i32> %load, <4 x i32> %x store <4 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_invert_mask_v8i32(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_invert_mask_v8i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RISCV-NEXT: vmnot.m v0, v0 ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <8 x i32>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i32> %load, <8 x i32> %x store <8 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_success_invert_mask_v16i32(<16 x i32> %x, ptr %ptr, <16 x i1> %mask) { ; RISCV-LABEL: test_masked_store_success_invert_mask_v16i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RISCV-NEXT: vmnot.m v0, v0 ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: ret %load = load <16 x i32>, ptr %ptr, align 32 %sel = select <16 x i1> %mask, <16 x i32> %load, <16 x i32> %x store <16 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_zextload(<4 x i64> %x, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_zextload: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RISCV-NEXT: vle32.v v12, (a0) ; RISCV-NEXT: vzext.vf2 v10, v12 ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vse64.v v8, (a0) ; RISCV-NEXT: ret %load = load <4 x i32>, ptr %ptr, align 32 %zext = zext <4 x i32> %load to <4 x i64> %masked = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %zext store <4 x i64> %masked, ptr %ptr, align 32 ret void } define void @test_masked_store_volatile_load(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_volatile_load: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vle32.v v10, (a0) ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vse32.v v8, (a0) ; RISCV-NEXT: ret %load = load volatile <8 x i32>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load store <8 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_volatile_store(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_volatile_store: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vle32.v v10, (a0) ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vse32.v v8, (a0) ; RISCV-NEXT: ret %load = load <8 x i32>, ptr %ptr, align 32 %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load store volatile <8 x i32> %sel, ptr %ptr, align 32 ret void } declare void @use_vec(<8 x i32>) define void @test_masked_store_intervening(<8 x i32> %x, ptr %ptr, <8 x i1> %mask) nounwind { ; RISCV-LABEL: test_masked_store_intervening: ; RISCV: # %bb.0: ; RISCV-NEXT: addi sp, sp, -32 ; RISCV-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RISCV-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RISCV-NEXT: csrr a1, vlenb ; RISCV-NEXT: slli a2, a1, 2 ; RISCV-NEXT: add a1, a2, a1 ; RISCV-NEXT: sub sp, sp, a1 ; RISCV-NEXT: csrr a1, vlenb ; RISCV-NEXT: slli a1, a1, 2 ; RISCV-NEXT: add a1, sp, a1 ; RISCV-NEXT: addi a1, a1, 16 ; RISCV-NEXT: vs1r.v v0, (a1) # vscale x 8-byte Folded Spill ; RISCV-NEXT: mv s0, a0 ; RISCV-NEXT: csrr a1, vlenb ; RISCV-NEXT: slli a1, a1, 1 ; RISCV-NEXT: add a1, sp, a1 ; RISCV-NEXT: addi a1, a1, 16 ; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vle32.v v8, (a0) ; RISCV-NEXT: addi a1, sp, 16 ; RISCV-NEXT: vs2r.v v8, (a1) # vscale x 16-byte Folded Spill ; RISCV-NEXT: vmv.v.i v8, 0 ; RISCV-NEXT: vse32.v v8, (a0) ; RISCV-NEXT: call use_vec ; RISCV-NEXT: csrr a0, vlenb ; RISCV-NEXT: slli a0, a0, 2 ; RISCV-NEXT: add a0, sp, a0 ; RISCV-NEXT: addi a0, a0, 16 ; RISCV-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload ; RISCV-NEXT: csrr a0, vlenb ; RISCV-NEXT: slli a0, a0, 1 ; RISCV-NEXT: add a0, sp, a0 ; RISCV-NEXT: addi a0, a0, 16 ; RISCV-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload ; RISCV-NEXT: addi a0, sp, 16 ; RISCV-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vse32.v v8, (s0) ; RISCV-NEXT: csrr a0, vlenb ; RISCV-NEXT: slli a1, a0, 2 ; RISCV-NEXT: add a0, a1, a0 ; RISCV-NEXT: add sp, sp, a0 ; RISCV-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RISCV-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RISCV-NEXT: addi sp, sp, 32 ; RISCV-NEXT: ret %load = load <8 x i32>, ptr %ptr, align 32 store <8 x i32> zeroinitializer, ptr %ptr, align 32 %tmp = load <8 x i32>, ptr %ptr call void @use_vec(<8 x i32> %tmp) %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load store <8 x i32> %sel, ptr %ptr, align 32 ret void } define void @test_masked_store_multiple_v8i32(<8 x i32> %x, <8 x i32> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) { ; RISCV-LABEL: test_masked_store_multiple_v8i32: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vmv1r.v v13, v0 ; RISCV-NEXT: vle32.v v14, (a1) ; RISCV-NEXT: vmv1r.v v0, v12 ; RISCV-NEXT: vmerge.vvm v10, v14, v10, v0 ; RISCV-NEXT: vmv1r.v v0, v13 ; RISCV-NEXT: vse32.v v8, (a0), v0.t ; RISCV-NEXT: vse32.v v10, (a1) ; RISCV-NEXT: ret %load = load <8 x i32>, ptr %ptr1, align 32 %load2 = load <8 x i32>, ptr %ptr2, align 32 %sel = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %load %sel2 = select <8 x i1> %mask2, <8 x i32> %y, <8 x i32> %load2 store <8 x i32> %sel, ptr %ptr1, align 32 store <8 x i32> %sel2, ptr %ptr2, align 32 ret void } define void @test_masked_store_multiple_v8i64(<8 x i64> %x, <8 x i64> %y, ptr %ptr1, ptr %ptr2, <8 x i1> %mask, <8 x i1> %mask2) { ; RISCV-LABEL: test_masked_store_multiple_v8i64: ; RISCV: # %bb.0: ; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RISCV-NEXT: vmv1r.v v17, v0 ; RISCV-NEXT: vle64.v v20, (a1) ; RISCV-NEXT: vmv1r.v v0, v16 ; RISCV-NEXT: vmerge.vvm v12, v20, v12, v0 ; RISCV-NEXT: vmv1r.v v0, v17 ; RISCV-NEXT: vse64.v v8, (a0), v0.t ; RISCV-NEXT: vse64.v v12, (a1) ; RISCV-NEXT: ret %load = load <8 x i64>, ptr %ptr1, align 32 %load2 = load <8 x i64>, ptr %ptr2, align 32 %sel = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %load %sel2 = select <8 x i1> %mask2, <8 x i64> %y, <8 x i64> %load2 store <8 x i64> %sel, ptr %ptr1, align 32 store <8 x i64> %sel2, ptr %ptr2, align 32 ret void } define void @test_masked_store_unaligned_v4i32(<4 x i32> %data, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_unaligned_v4i32: ; RISCV: # %bb.0: ; RISCV-NEXT: addi a0, a0, 1 ; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RISCV-NEXT: vle8.v v9, (a0) ; RISCV-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RISCV-NEXT: vmerge.vvm v8, v9, v8, v0 ; RISCV-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RISCV-NEXT: vse8.v v8, (a0) ; RISCV-NEXT: ret %ptr_i8 = getelementptr i8, ptr %ptr, i32 1 %ptr_vec = bitcast ptr %ptr_i8 to ptr %load = load <4 x i32>, ptr %ptr_vec, align 1 %sel = select <4 x i1> %mask, <4 x i32> %data, <4 x i32> %load store <4 x i32> %sel, ptr %ptr_vec, align 1 ret void } define void @test_masked_store_unaligned_v4i64(<4 x i64> %data, ptr %ptr, <4 x i1> %mask) { ; RISCV-LABEL: test_masked_store_unaligned_v4i64: ; RISCV: # %bb.0: ; RISCV-NEXT: addi a0, a0, 1 ; RISCV-NEXT: li a1, 32 ; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RISCV-NEXT: vle8.v v10, (a0) ; RISCV-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RISCV-NEXT: vse8.v v8, (a0) ; RISCV-NEXT: ret %ptr_i8 = getelementptr i8, ptr %ptr, i64 1 %ptr_vec = bitcast ptr %ptr_i8 to ptr %load = load <4 x i64>, ptr %ptr_vec, align 1 %sel = select <4 x i1> %mask, <4 x i64> %data, <4 x i64> %load store <4 x i64> %sel, ptr %ptr_vec, align 1 ret void } define void @test_masked_store_unaligned_v8i32(<8 x i32> %data, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_unaligned_v8i32: ; RISCV: # %bb.0: ; RISCV-NEXT: addi a0, a0, 1 ; RISCV-NEXT: li a1, 32 ; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RISCV-NEXT: vle8.v v10, (a0) ; RISCV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RISCV-NEXT: vmerge.vvm v8, v10, v8, v0 ; RISCV-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RISCV-NEXT: vse8.v v8, (a0) ; RISCV-NEXT: ret %ptr_i8 = getelementptr i8, ptr %ptr, i32 1 %ptr_vec = bitcast ptr %ptr_i8 to ptr %load = load <8 x i32>, ptr %ptr_vec, align 1 %sel = select <8 x i1> %mask, <8 x i32> %data, <8 x i32> %load store <8 x i32> %sel, ptr %ptr_vec, align 1 ret void } define void @test_masked_store_unaligned_v8i64(<8 x i64> %data, ptr %ptr, <8 x i1> %mask) { ; RISCV-LABEL: test_masked_store_unaligned_v8i64: ; RISCV: # %bb.0: ; RISCV-NEXT: addi a0, a0, 1 ; RISCV-NEXT: li a1, 64 ; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RISCV-NEXT: vle8.v v12, (a0) ; RISCV-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RISCV-NEXT: vmerge.vvm v8, v12, v8, v0 ; RISCV-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RISCV-NEXT: vse8.v v8, (a0) ; RISCV-NEXT: ret %ptr_i8 = getelementptr i8, ptr %ptr, i64 1 %ptr_vec = bitcast ptr %ptr_i8 to ptr %load = load <8 x i64>, ptr %ptr_vec, align 1 %sel = select <8 x i1> %mask, <8 x i64> %data, <8 x i64> %load store <8 x i64> %sel, ptr %ptr_vec, align 1 ret void }