diff options
| author | Yeting Kuo <yeting.kuo@sifive.com> | 2022-07-29 17:21:11 +0800 |
|---|---|---|
| committer | Yeting Kuo <yeting.kuo@sifive.com> | 2022-08-11 13:23:08 +0800 |
| commit | 7050f2102e68f0b9540d2727f451bd56ea1a1b52 (patch) | |
| tree | b49d1a13432acdc567bbaef1c89215edd2920c6d | |
| parent | 7a70e6e224cdbbd02cf6a7a7d8ff69950faaa35f (diff) | |
| download | llvm-7050f2102e68f0b9540d2727f451bd56ea1a1b52.zip llvm-7050f2102e68f0b9540d2727f451bd56ea1a1b52.tar.gz llvm-7050f2102e68f0b9540d2727f451bd56ea1a1b52.tar.bz2 | |
[RISCV][test] Precommitted test for optimization for vmerge and unmasked intrinsics.
Precommitted test cases for D130442.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D130753
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll | 215 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll | 358 |
2 files changed, 573 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll new file mode 100644 index 0000000..36e31aa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-peephole-vmerge-vops.ll @@ -0,0 +1,215 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=256 | FileCheck %s + +declare <8 x i16> @llvm.vp.merge.nxv2i16(<8 x i1>, <8 x i16>, <8 x i16>, i32) +declare <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1>, <8 x i32>, <8 x i32>, i32) +declare <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1>, <8 x float>, <8 x float>, i32) +declare <8 x double> @llvm.vp.merge.nxv2f64(<8 x i1>, <8 x double>, <8 x double>, i32) + +; Test binary operator with vp.merge and vp.smax. +declare <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32>, <8 x i32>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpadd(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test glued node of merge should not be deleted. +declare <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32>, <8 x i32>, metadata, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpadd2(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test vp.merge have all-ones mask. +define <8 x i32> @vpmerge_vpadd3(<8 x i32> %passthru, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.add.nxv2i32(<8 x i32> %x, <8 x i32> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %mask, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test float binary operator with vp.merge and vp.fadd. +declare <8 x float> @llvm.vp.fadd.nxv2f32(<8 x float>, <8 x float>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpfadd(<8 x float> %passthru, <8 x float> %x, <8 x float> %y, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.fadd.nxv2f32(<8 x float> %x, <8 x float> %y, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test conversion by fptosi. +declare <8 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<8 x float>, <8 x i1>, i32) +define <8 x i16> @vpmerge_vpfptosi(<8 x i16> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptosi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i16> @llvm.vp.merge.nxv2i16(<8 x i1> %m, <8 x i16> %a, <8 x i16> %passthru, i32 %vl) + ret <8 x i16> %b +} + +; Test conversion by sitofp. +declare <8 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<8 x i64>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpsitofp(<8 x float> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpsitofp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test integer extension by vp.zext. +declare <8 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<8 x i8>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpzext(<8 x i32> %passthru, <8 x i8> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpzext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<8 x i8> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test integer truncation by vp.trunc. +declare <8 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<8 x i64>, <8 x i1>, i32) +define <8 x i32> @vpmerge_vptrunc(<8 x i32> %passthru, <8 x i64> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<8 x i64> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test integer extension by vp.fpext. +declare <8 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<8 x float>, <8 x i1>, i32) +define <8 x double> @vpmerge_vpfpext(<8 x double> %passthru, <8 x float> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfpext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<8 x float> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x double> @llvm.vp.merge.nxv2f64(<8 x i1> %m, <8 x double> %a, <8 x double> %passthru, i32 %vl) + ret <8 x double> %b +} + +; Test integer truncation by vp.trunc. +declare <8 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<8 x double>, <8 x i1>, i32) +define <8 x float> @vpmerge_vpfptrunc(<8 x float> %passthru, <8 x double> %x, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<8 x double> %x, <8 x i1> %mask, i32 %vl) + %b = call <8 x float> @llvm.vp.merge.nxv2f32(<8 x i1> %m, <8 x float> %a, <8 x float> %passthru, i32 %vl) + ret <8 x float> %b +} + +; Test load operation by vp.load. +declare <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> *, <8 x i1>, i32) +define <8 x i32> @vpmerge_vpload(<8 x i32> %passthru, <8 x i32> * %p, <8 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} + +; Test result have chain and glued node. +define <8 x i32> @vpmerge_vpload2(<8 x i32> %passthru, <8 x i32> * %p, <8 x i32> %x, <8 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v11, (a0) +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <8 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <8 x i1> %splat, <8 x i1> poison, <8 x i32> zeroinitializer + %a = call <8 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<8 x i32> * %p, <8 x i1> %mask, i32 %vl) + %m = call <8 x i1> @llvm.vp.icmp.nxv2i32(<8 x i32> %x, <8 x i32> %y, metadata !"eq", <8 x i1> %mask, i32 %vl) + %b = call <8 x i32> @llvm.vp.merge.nxv2i32(<8 x i1> %m, <8 x i32> %a, <8 x i32> %passthru, i32 %vl) + ret <8 x i32> %b +} diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll new file mode 100644 index 0000000..4268b1b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -0,0 +1,358 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32) +declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32) +declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32) +declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32) + +; Test binary operator with vp.merge and vp.smax. +declare <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i1>, i32) +define <vscale x 2 x i32> @vpmerge_vpadd(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test glued node of merge should not be deleted. +declare <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, metadata, <vscale x 2 x i1>, i32) +define <vscale x 2 x i32> @vpmerge_vpadd2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v11, v9, v10 +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 %vl) + %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test vp.merge have all-ones mask. +define <vscale x 2 x i32> @vpmerge_vpadd3(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpadd3: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, mu +; CHECK-NEXT: vmset.m v0 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.add.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %mask, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test float binary operator with vp.merge and vp.fadd. +declare <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x i1>, i32) +define <vscale x 2 x float> @vpmerge_vpfadd(<vscale x 2 x float> %passthru, <vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfadd: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfadd.vv v9, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x float> @llvm.vp.fadd.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x float> %y, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) + ret <vscale x 2 x float> %b +} + +; Test for binary operator with specific EEW by riscv.vrgatherei16. +declare <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i16>, i64) +define <vscale x 2 x i32> @vpmerge_vrgatherei16(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vrgatherei16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vrgatherei16.vv v8, v9, v10 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %2 = tail call <vscale x 2 x i32> @llvm.riscv.vrgatherei16.vv.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32> %x, <vscale x 2 x i16> %y, i64 %1) + %3 = tail call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %2, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %2 +} + +; Test conversion by fptosi. +declare <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +define <vscale x 2 x i16> @vpmerge_vpfptosi(<vscale x 2 x i16> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptosi: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i16> @llvm.vp.fptosi.nxv2i16.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %a, <vscale x 2 x i16> %passthru, i32 %vl) + ret <vscale x 2 x i16> %b +} + +; Test conversion by sitofp. +declare <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) +define <vscale x 2 x float> @vpmerge_vpsitofp(<vscale x 2 x float> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpsitofp: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x float> @llvm.vp.sitofp.nxv2f32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) + ret <vscale x 2 x float> %b +} + +; Test integer extension by vp.zext. +declare <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i32) +define <vscale x 2 x i32> @vpmerge_vpzext(<vscale x 2 x i32> %passthru, <vscale x 2 x i8> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpzext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vzext.vf4 v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.zext.nxv2i32.nxv2i8(<vscale x 2 x i8> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test integer truncation by vp.trunc. +declare <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i32) +define <vscale x 2 x i32> @vpmerge_vptrunc(<vscale x 2 x i32> %passthru, <vscale x 2 x i64> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vncvt.x.x.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.trunc.nxv2i32.nxv2i64(<vscale x 2 x i64> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test integer extension by vp.fpext. +declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32) +define <vscale x 2 x double> @vpmerge_vpfpext(<vscale x 2 x double> %passthru, <vscale x 2 x float> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfpext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.f.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2f32(<vscale x 2 x float> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %a, <vscale x 2 x double> %passthru, i32 %vl) + ret <vscale x 2 x double> %b +} + +; Test integer truncation by vp.trunc. +declare <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32) +define <vscale x 2 x float> @vpmerge_vpfptrunc(<vscale x 2 x float> %passthru, <vscale x 2 x double> %x, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpfptrunc: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.f.f.w v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x float> @llvm.vp.fptrunc.nxv2f32.nxv2f64(<vscale x 2 x double> %x, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) + ret <vscale x 2 x float> %b +} + +; Test load operation by vp.load. +declare <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32> *, <vscale x 2 x i1>, i32) +define <vscale x 2 x i32> @vpmerge_vpload(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> * %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32> * %p, <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test result have chain and glued node. +define <vscale x 2 x i32> @vpmerge_vpload2(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> * %p, <vscale x 2 x i32> %x, <vscale x 2 x i32> %y, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vpload2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32.v v11, (a0) +; CHECK-NEXT: vmseq.vv v0, v9, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v11, v0 +; CHECK-NEXT: ret + %splat = insertelement <vscale x 2 x i1> poison, i1 -1, i32 0 + %mask = shufflevector <vscale x 2 x i1> %splat, <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer + %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0nxv2i32(<vscale x 2 x i32> * %p, <vscale x 2 x i1> %mask, i32 %vl) + %m = call <vscale x 2 x i1> @llvm.vp.icmp.nxv2i32(<vscale x 2 x i32> %x, <vscale x 2 x i32> %y, metadata !"eq", <vscale x 2 x i1> %mask, i32 %vl) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; FIXME: Merge vmerge.vvm and vleffN.v +declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>*, i64) +define <vscale x 2 x i32> @vpmerge_vleff(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> * %p, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vleff: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu +; CHECK-NEXT: vle32ff.v v9, (a0) +; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %p, i64 %1) + %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0 + %c = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %b, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %c +} + +; Test strided load by riscv.vlse +declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>*, i64, i64) +define <vscale x 2 x i32> @vpmerge_vlse(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> * %p, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vlse: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vlse32.v v9, (a0), a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %p, i64 %s, i64 %1) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test indexed load by riscv.vluxei +declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32>, <vscale x 2 x i32>*, <vscale x 2 x i64>, i64) +define <vscale x 2 x i32> @vpmerge_vluxei(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> * %p, <vscale x 2 x i64> %idx, <vscale x 2 x i1> %m, i64 %s, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vluxei: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu +; CHECK-NEXT: vluxei64.v v9, (a0), v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %p, <vscale x 2 x i64> %idx, i64 %1) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test vector index by riscv.vid +declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32>, i64) +define <vscale x 2 x i32> @vpmerge_vid(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vid: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(<vscale x 2 x i32> undef, i64 %1) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test riscv.viota +declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i64) +define <vscale x 2 x i32> @vpmerge_viota(<vscale x 2 x i32> %passthru, <vscale x 2 x i1> %m, <vscale x 2 x i1> %vm, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_viota: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: viota.m v10, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i1> %vm, i64 %1) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test riscv.vfclass +declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x float>, i64) +define <vscale x 2 x i32> @vpmerge_vflcass(<vscale x 2 x i32> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vflcass: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfclass.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x float> %vf, i64 %1) + %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl) + ret <vscale x 2 x i32> %b +} + +; Test riscv.vfsqrt +declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64) +define <vscale x 2 x float> @vpmerge_vfsqrt(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vfsqrt: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfsqrt.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 %1) + %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) + ret <vscale x 2 x float> %b +} + +; Test reciprocal operation by riscv.vfrec7 +declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, i64) +define <vscale x 2 x float> @vpmerge_vfrec7(<vscale x 2 x float> %passthru, <vscale x 2 x float> %vf, <vscale x 2 x i1> %m, i32 zeroext %vl) { +; CHECK-LABEL: vpmerge_vfrec7: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfrec7.v v9, v9 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu +; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0 +; CHECK-NEXT: ret + %1 = zext i32 %vl to i64 + %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> %vf, i64 %1) + %b = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %a, <vscale x 2 x float> %passthru, i32 %vl) + ret <vscale x 2 x float> %b +} |
