diff options
Diffstat (limited to 'llvm/test/CodeGen')
54 files changed, 21449 insertions, 19 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/abs_i32.ll b/llvm/test/CodeGen/AMDGPU/abs_i32.ll new file mode 100644 index 0000000..b53047f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/abs_i32.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s +; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefixes=R600 %s + +define amdgpu_kernel void @abs_v1(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v1: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v1: +; R600: ; %bb.0: +; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: MOV * T0.W, KC0[2].Z, +; R600-NEXT: SUB_INT * T1.W, 0.0, PV.W, +; R600-NEXT: MAX_INT T0.X, T0.W, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %res = call i32 @llvm.abs.i32(i32 %arg, i1 false) + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @abs_v2(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v2: +; R600: ; %bb.0: +; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z, +; R600-NEXT: MAX_INT T0.X, KC0[2].Z, PV.W, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %neg = sub i32 0, %arg + %cond = icmp sgt i32 %arg, %neg + %res = select i1 %cond, i32 %arg, i32 %neg + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} + +define amdgpu_kernel void @abs_v3(ptr addrspace(1) %out, i32 %arg) { +; GFX9-LABEL: abs_v3: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_abs_i32 s2, s2 +; GFX9-NEXT: v_mov_b32_e32 v1, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; R600-LABEL: abs_v3: +; R600: ; %bb.0: +; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] +; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 +; R600-NEXT: CF_END +; R600-NEXT: PAD +; R600-NEXT: ALU clause starting at 4: +; R600-NEXT: SUB_INT * T0.W, 0.0, KC0[2].Z, +; R600-NEXT: MAX_INT T0.X, PV.W, KC0[2].Z, +; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, +; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) + %neg = sub i32 0, %arg + %cond = icmp sgt i32 %neg, %arg + %res = select i1 %cond, i32 %neg, i32 %arg + store i32 %res, ptr addrspace(1) %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll new file mode 100644 index 0000000..489323b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/mixed-float-bf16-arith.ll @@ -0,0 +1,186 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfh,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfh,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half>, + <vscale x 1 x half>, + <vscale x 1 x half>, + iXLen, iXLen); + +declare <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32( + <vscale x 1 x i32>, + <vscale x 1 x i32>, + <vscale x 1 x i32>, + iXLen); + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @test_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_half_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + call void @llvm.riscv.vse(<vscale x 1 x half> %a, ptr %ptr, iXLen %2) + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_i32_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i32> %3, <vscale x 1 x i32> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_i32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma +; CHECK-NEXT: vadd.vv v10, v10, v11 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i32> @llvm.riscv.vadd.nxv1i32.nxv1i32( + <vscale x 1 x i32> poison, + <vscale x 1 x i32> %3, + <vscale x 1 x i32> %4, + iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + call void @llvm.riscv.vse(<vscale x 1 x i32> %a, ptr %ptr, iXLen %2) + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_half_bf16_half(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_half_bf16_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v9, v10, v11 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vse16.v v9, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %c = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %a, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + store <vscale x 1 x half> %c, ptr %ptr + + ret <vscale x 1 x bfloat> %b +} + +define <vscale x 1 x bfloat> @test_bf16_half_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x half> %3, <vscale x 1 x half> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_bf16_half_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v10, v10, v11 +; CHECK-NEXT: vsetvli zero, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vse16.v v10, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16( + <vscale x 1 x half> poison, + <vscale x 1 x half> %3, + <vscale x 1 x half> %4, + iXLen 0, iXLen %2) + + %c = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %a, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + store <vscale x 1 x half> %b, ptr %ptr + + ret <vscale x 1 x bfloat> %c +} + +define <vscale x 1 x bfloat> @test_bf16_i16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2, <vscale x 1 x i16> %3, <vscale x 1 x i16> %4, ptr %ptr) nounwind { +; CHECK-LABEL: test_bf16_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a2, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: vadd.vv v9, v10, v11 +; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma +; CHECK-NEXT: vse16.v v9, (a1) +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + %b = call <vscale x 1 x i16> @llvm.riscv.vadd.nxv1i16.nxv1i16( + <vscale x 1 x i16> poison, + <vscale x 1 x i16> %3, + <vscale x 1 x i16> %4, + iXLen %2) + + store <vscale x 1 x i16> %b, ptr %ptr + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll new file mode 100644 index 0000000..db1b081 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-bf.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfadd.vv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfadd.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfadd.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfadd_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfadd.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfadd.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfadd.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll new file mode 100644 index 0000000..d7d49b3 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-bf.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16( + <vscale x 1 x i16>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 1 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16.nxv1bf16( + <vscale x 1 x i16> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16( + <vscale x 1 x i16>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 1 x i16> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16.nxv1bf16( + <vscale x 1 x i16> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 1 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16( + <vscale x 2 x i16>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 2 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16.nxv2bf16( + <vscale x 2 x i16> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16( + <vscale x 2 x i16>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 2 x i16> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16.nxv2bf16( + <vscale x 2 x i16> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 2 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16( + <vscale x 4 x i16>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 4 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16.nxv4bf16( + <vscale x 4 x i16> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16( + <vscale x 4 x i16>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfclass.v v8, v9, v0.t +; CHECK-NEXT: ret + <vscale x 4 x i16> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16.nxv4bf16( + <vscale x 4 x i16> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 4 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16( + <vscale x 8 x i16>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 8 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16.nxv8bf16( + <vscale x 8 x i16> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16( + <vscale x 8 x i16>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfclass.v v8, v10, v0.t +; CHECK-NEXT: ret + <vscale x 8 x i16> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16.nxv8bf16( + <vscale x 8 x i16> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 8 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16( + <vscale x 16 x i16>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 16 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16.nxv16bf16( + <vscale x 16 x i16> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16( + <vscale x 16 x i16>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfclass.v v8, v12, v0.t +; CHECK-NEXT: ret + <vscale x 16 x i16> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16.nxv16bf16( + <vscale x 16 x i16> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 16 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16( + <vscale x 32 x i16>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32bf16( +; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfclass.v v8, v8 +; CHECK-NEXT: ret + <vscale x 32 x bfloat> %0, + iXLen %1) nounwind { +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16.nxv32bf16( + <vscale x 32 x i16> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i16> %a +} + +declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16( + <vscale x 32 x i16>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16( +; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, tu, mu +; CHECK-NEXT: vfclass.v v8, v16, v0.t +; CHECK-NEXT: ret + <vscale x 32 x i16> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3) nounwind { +entry: + %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16.nxv32bf16( + <vscale x 32 x i16> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 0) + + ret <vscale x 32 x i16> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll new file mode 100644 index 0000000..13821d7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmacc.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmacc.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll new file mode 100644 index 0000000..09fc199 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmadd.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmadd.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmadd.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmacc.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll new file mode 100644 index 0000000..a337d30 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmax.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmax.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmax.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmax.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmax.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmax.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmax.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmax.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmax.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmax.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmax.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmax_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmax.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmax.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmax.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll new file mode 100644 index 0000000..86ba7c7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-bf.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen %3) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen %3) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen %3) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen %3) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen %3) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + <vscale x 32 x i1> %2, + iXLen %3) + + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmerge.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 1 x i1> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmerge.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 2 x i1> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmerge.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 4 x i1> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmerge.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 8 x i1> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmerge.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 16 x i1> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fmv.h.x fa5, zero +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmerge.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat zeroinitializer, + <vscale x 32 x i1> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll new file mode 100644 index 0000000..37c0cf5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmin.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmin.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmin.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmin.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmin.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmin.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmin.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmin.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmin.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmin.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmin.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmin_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmin.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmin.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmin.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll new file mode 100644 index 0000000..948d219 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsac.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsac.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll new file mode 100644 index 0000000..6838f37 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsub.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfmsub.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsub.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsac.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll new file mode 100644 index 0000000..44bce72 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-bf.ll @@ -0,0 +1,607 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmul.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmul.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmul.vv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmul.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmul.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmul.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmul.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmul.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmul_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmul.vf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmul.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll new file mode 100644 index 0000000..fbc73119 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-bf-s.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+experimental-zvfbfa -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+experimental-zvfbfa -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s + +declare bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv1bf16(<vscale x 1 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv1bf16(<vscale x 1 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv2bf16(<vscale x 2 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv2bf16(<vscale x 2 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv4bf16(<vscale x 4 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv4bf16(<vscale x 4 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv8bf16(<vscale x 8 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv8bf16(<vscale x 8 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv16bf16(<vscale x 16 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv16bf16(<vscale x 16 x bfloat> %0) + ret bfloat %a +} + +declare bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat>) + +define bfloat @intrinsic_vfmv.f.s_s_nxv32bf16(<vscale x 32 x bfloat> %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.f.s_s_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: fmv.h.x fa0, a0 +; CHECK-NEXT: ret +entry: + %a = call bfloat @llvm.riscv.vfmv.f.s.nxv32bf16(<vscale x 32 x bfloat> %0) + ret bfloat %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll new file mode 100644 index 0000000..a810809 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-s-bf.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat>, bfloat, iXLen) + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat>, bfloat, iXLen) + +define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat>, bfloat, iXLen) + +define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat>, bfloat, iXLen) + +define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat>, bfloat, iXLen) + +define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat>, bfloat, iXLen) + +define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.s.f.nxv2bf16(<vscale x 2 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.s.f.nxv4bf16(<vscale x 4 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.s.f.nxv8bf16(<vscale x 8 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.s.f.nxv16bf16(<vscale x 16 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vfmv.s.f_f_zero_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.s.f.nxv32bf16(<vscale x 32 x bfloat> %0, bfloat 0.0, iXLen %1) + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.s.f_f_nxv1bf16_negzero(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1bf16_negzero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.s.f.nxv1bf16(<vscale x 1 x bfloat> %0, bfloat -0.0, iXLen %1) + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll new file mode 100644 index 0000000..f3293dd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv-v-bf.ll @@ -0,0 +1,216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_f_nxv1bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfmv.v.f_f_nxv2bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfmv.v.f_f_nxv4bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfmv.v.f_f_nxv8bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfmv.v.f_f_nxv16bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfmv.v.f_f_nxv32bf16(bfloat %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat> poison, + bfloat %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfmv.v.f_zero_nxv1bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfmv.v.f.nxv1bf16( + <vscale x 1 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 2 x bfloat> @intrinsic_vmv.v.i_zero_nxv2bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfmv.v.f.nxv2bf16( + <vscale x 2 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 2 x bfloat> %a +} + +define <vscale x 4 x bfloat> @intrinsic_vmv.v.i_zero_nxv4bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfmv.v.f.nxv4bf16( + <vscale x 4 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 4 x bfloat> %a +} + +define <vscale x 8 x bfloat> @intrinsic_vmv.v.i_zero_nxv8bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfmv.v.f.nxv8bf16( + <vscale x 8 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 8 x bfloat> %a +} + +define <vscale x 16 x bfloat> @intrinsic_vmv.v.i_zero_nxv16bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfmv.v.f.nxv16bf16( + <vscale x 16 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 32 x bfloat> @intrinsic_vmv.v.i_zero_nxv32bf16(iXLen %0) nounwind { +; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfmv.v.f.nxv32bf16( + <vscale x 32 x bfloat> poison, + bfloat 0.0, + iXLen %0) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll new file mode 100644 index 0000000..7d587fd --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-bf-f.ll @@ -0,0 +1,226 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat>, + <vscale x 1 x float>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat> poison, + <vscale x 1 x float> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat>, + <vscale x 1 x float>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1bf16_nxv1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1bf16.nxv1f32( + <vscale x 1 x bfloat> %0, + <vscale x 1 x float> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat>, + <vscale x 2 x float>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat> poison, + <vscale x 2 x float> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat>, + <vscale x 2 x float>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2bf16_nxv2f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2bf16.nxv2f32( + <vscale x 2 x bfloat> %0, + <vscale x 2 x float> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat>, + <vscale x 4 x float>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat> poison, + <vscale x 4 x float> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat>, + <vscale x 4 x float>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4bf16_nxv4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4bf16.nxv4f32( + <vscale x 4 x bfloat> %0, + <vscale x 4 x float> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat>, + <vscale x 8 x float>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat> poison, + <vscale x 8 x float> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat>, + <vscale x 8 x float>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8bf16_nxv8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8bf16.nxv8f32( + <vscale x 8 x bfloat> %0, + <vscale x 8 x float> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat>, + <vscale x 16 x float>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rod.f.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat> poison, + <vscale x 16 x float> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat>, + <vscale x 16 x float>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16bf16_nxv16f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rod.f.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16bf16.nxv16f32( + <vscale x 16 x bfloat> %0, + <vscale x 16 x float> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll new file mode 100644 index 0000000..ee9e3d1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-bf.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.x.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll new file mode 100644 index 0000000..521f727 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-bf.ll @@ -0,0 +1,270 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v10, v8 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v12, v8 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.rtz.xu.f.w v16, v8 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.rtz.xu.f.w v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll new file mode 100644 index 0000000..ab9ebad --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v10, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v12, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.x.f.w v16, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.x.f.w v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll new file mode 100644 index 0000000..61c6803 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1bf16( + <vscale x 1 x i8> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16(<vscale x 1 x i8> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1bf16( + <vscale x 1 x i8> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2bf16( + <vscale x 2 x i8> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16(<vscale x 2 x i8> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2bf16( + <vscale x 2 x i8> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4bf16( + <vscale x 4 x i8> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16(<vscale x 4 x i8> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4bf16( + <vscale x 4 x i8> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v10, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8bf16( + <vscale x 8 x i8> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16(<vscale x 8 x i8> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8bf16( + <vscale x 8 x i8> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v12, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16bf16( + <vscale x 16 x i8> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16(<vscale x 16 x i8> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16bf16( + <vscale x 16 x i8> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vfncvt.xu.f.w v16, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32bf16( + <vscale x 32 x i8> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x i8> %a +} + +declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16(<vscale x 32 x i8> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfncvt.xu.f.w v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32bf16( + <vscale x 32 x i8> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x i8> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll new file mode 100644 index 0000000..4b4091b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmacc.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmacc.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmacc.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmacc.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmacc.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmacc.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmacc.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll new file mode 100644 index 0000000..2bb6bf5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmadd.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmadd.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmadd.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmadd.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmadd.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmadd.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmadd.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmadd.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmacc.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmacc.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmadd.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll new file mode 100644 index 0000000..cfbaafa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsac.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsac.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsac.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsac.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsac.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsac.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsac.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll new file mode 100644 index 0000000..5ebbb90c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-bf.ll @@ -0,0 +1,553 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v10, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsub.vv v8, v12, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1bf16_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16(<vscale x 2 x bfloat> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2bf16_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfnmsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16(<vscale x 4 x bfloat> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4bf16_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfnmsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16(<vscale x 8 x bfloat> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8bf16_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfnmsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16(<vscale x 16 x bfloat> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16bf16_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfnmsub.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfnmsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0); + + ret <vscale x 16 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsub.vv v8, v9, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %2, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1bf16_nxv1bf16_nxv1bf16_commute2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsac.vv v8, v10, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} + +define <vscale x 1 x bfloat> @intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute(<vscale x 1 x bfloat> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1bf16_bf16_nxv1bf16_commute: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfnmsac.vf v8, fa0, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfnmsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> %2, + bfloat %1, + <vscale x 1 x bfloat> %0, + iXLen 7, iXLen %3, iXLen 3) + + ret <vscale x 1 x bfloat> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll new file mode 100644 index 0000000..1211415 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-bf.ll @@ -0,0 +1,282 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrec7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrec7.mask.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrec7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrec7.mask.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrec7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrec7.v v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrec7.mask.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrec7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrec7.v v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrec7.mask.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrec7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrec7.v v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrec7.mask.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrec7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrec7.v v8, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + iXLen 0, iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrec7.v v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrec7.mask.nxv32bf16( + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %0, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll new file mode 100644 index 0000000..4626b86 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-bf16.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrsqrt7.v v8, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16(<vscale x 32 x i1> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrsqrt7.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsqrt7.mask.nxv32bf16( + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %0, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll new file mode 100644 index 0000000..54a6d48 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-bf.ll @@ -0,0 +1,282 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfrsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfrsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfrsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfrsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfrsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfrsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfrsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfrsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll new file mode 100644 index 0000000..2cd698d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnj.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnj.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnj.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnj.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnj.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnj.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnj.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnj.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll new file mode 100644 index 0000000..08340be --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjn.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjn.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjn.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjn.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll new file mode 100644 index 0000000..e51a42e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-bf.ll @@ -0,0 +1,571 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjx.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsgnjx.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsgnjx.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsgnjx.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll new file mode 100644 index 0000000..c65719c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-bf.ll @@ -0,0 +1,288 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1down.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1down.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1down.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1down.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1down.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfslide1down.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1down.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll new file mode 100644 index 0000000..57a4898 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-bf.ll @@ -0,0 +1,294 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfslide1up.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfslide1up.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfslide1up.vf v9, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfslide1up.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfslide1up.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfslide1up.vf v12, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfslide1up.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfslide1up.vf v16, v8, fa0 +; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfslide1up.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfslide1up.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll new file mode 100644 index 0000000..aea7521 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-bf.ll @@ -0,0 +1,559 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1bf16_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2bf16_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4bf16_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8bf16_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsub.vv v8, v10, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16bf16_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsub.vv v8, v12, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsub.vv v8, v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x bfloat> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32bf16_nxv32bf16_nxv32bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl8re16.v v24, (a0) +; CHECK-NEXT: vsetvli zero, a1, e16alt, m8, ta, mu +; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.nxv32bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + <vscale x 32 x bfloat> %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.nxv1bf16.bf16( + <vscale x 1 x bfloat> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1bf16_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfsub.mask.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.nxv2bf16.bf16( + <vscale x 2 x bfloat> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2bf16_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfsub.mask.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.nxv4bf16.bf16( + <vscale x 4 x bfloat> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4bf16_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfsub.mask.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.nxv8bf16.bf16( + <vscale x 8 x bfloat> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8bf16_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfsub.mask.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.nxv16bf16.bf16( + <vscale x 16 x bfloat> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16bf16_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfsub.mask.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfsub_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma +; CHECK-NEXT: vfsub.vf v8, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.nxv32bf16.bf16( + <vscale x 32 x bfloat> poison, + <vscale x 32 x bfloat> %0, + bfloat %1, + iXLen 7, iXLen %2) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat>, + <vscale x 32 x bfloat>, + bfloat, + <vscale x 32 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, bfloat %2, <vscale x 32 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32bf16_nxv32bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, mu +; CHECK-NEXT: vfsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfsub.mask.nxv32bf16.bf16( + <vscale x 32 x bfloat> %0, + <vscale x 32 x bfloat> %1, + bfloat %2, + <vscale x 32 x i1> %3, + iXLen 7, iXLen %4, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll new file mode 100644 index 0000000..62feac8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll new file mode 100644 index 0000000..c5417e8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-w-bf.ll @@ -0,0 +1,773 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwadd.wv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl4re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwadd.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wv v8, v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwadd.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwadd.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwadd.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.wv v8, v10, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwadd.wv v8, v12, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll new file mode 100644 index 0000000..b7df45b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-x.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> poison, + <vscale x 1 x i8> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> poison, + <vscale x 2 x i8> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.x.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> poison, + <vscale x 4 x i8> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> poison, + <vscale x 8 x i8> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> poison, + <vscale x 16 x i8> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vfwcvt.f.x.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> poison, + <vscale x 32 x i8> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfwcvt.f.x.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll new file mode 100644 index 0000000..c370261 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-bf-xu.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> poison, + <vscale x 1 x i8> %0, + iXLen %1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat>, + <vscale x 1 x i8>, + <vscale x 1 x i1>, + iXLen, iXLen); + +define <vscale x 1 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8(<vscale x 1 x bfloat> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1bf16_nxv1i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf8, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1bf16.nxv1i8( + <vscale x 1 x bfloat> %0, + <vscale x 1 x i8> %1, + <vscale x 1 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 1 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> poison, + <vscale x 2 x i8> %0, + iXLen %1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat>, + <vscale x 2 x i8>, + <vscale x 2 x i1>, + iXLen, iXLen); + +define <vscale x 2 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8(<vscale x 2 x bfloat> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2bf16_nxv2i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2bf16.nxv2i8( + <vscale x 2 x bfloat> %0, + <vscale x 2 x i8> %1, + <vscale x 2 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 2 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, ma +; CHECK-NEXT: vfwcvt.f.xu.v v9, v8 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> poison, + <vscale x 4 x i8> %0, + iXLen %1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat>, + <vscale x 4 x i8>, + <vscale x 4 x i1>, + iXLen, iXLen); + +define <vscale x 4 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8(<vscale x 4 x bfloat> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4bf16_nxv4i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, mf2, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v9, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4bf16.nxv4i8( + <vscale x 4 x bfloat> %0, + <vscale x 4 x i8> %1, + <vscale x 4 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 4 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> poison, + <vscale x 8 x i8> %0, + iXLen %1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat>, + <vscale x 8 x i8>, + <vscale x 8 x i1>, + iXLen, iXLen); + +define <vscale x 8 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8(<vscale x 8 x bfloat> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8bf16_nxv8i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m1, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v10, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8bf16.nxv8i8( + <vscale x 8 x bfloat> %0, + <vscale x 8 x i8> %1, + <vscale x 8 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 8 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> poison, + <vscale x 16 x i8> %0, + iXLen %1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat>, + <vscale x 16 x i8>, + <vscale x 16 x i1>, + iXLen, iXLen); + +define <vscale x 16 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8(<vscale x 16 x bfloat> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16bf16_nxv16i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m2, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v12, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16bf16.nxv16i8( + <vscale x 16 x bfloat> %0, + <vscale x 16 x i8> %1, + <vscale x 16 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 16 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: vfwcvt.f.xu.v v8, v16 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> poison, + <vscale x 32 x i8> %0, + iXLen %1) + + ret <vscale x 32 x bfloat> %a +} + +declare <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat>, + <vscale x 32 x i8>, + <vscale x 32 x i1>, + iXLen, iXLen); + +define <vscale x 32 x bfloat> @intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8(<vscale x 32 x bfloat> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32bf16_nxv32i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e8alt, m4, ta, mu +; CHECK-NEXT: vfwcvt.f.xu.v v8, v16, v0.t +; CHECK-NEXT: ret +entry: + %a = call <vscale x 32 x bfloat> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32bf16.nxv32i8( + <vscale x 32 x bfloat> %0, + <vscale x 32 x i8> %1, + <vscale x 32 x i1> %2, + iXLen %3, iXLen 1) + + ret <vscale x 32 x bfloat> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll new file mode 100644 index 0000000..a3f6678 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwmsac.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwmsac.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwmsac.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll new file mode 100644 index 0000000..577b93a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwmul.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwmul.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwmul.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwmul.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwmul.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll new file mode 100644 index 0000000..1e05e4c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmacc.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmacc.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll new file mode 100644 index 0000000..223ad4f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-bf.ll @@ -0,0 +1,506 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmsac.vv v8, v16, v20 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16( + <vscale x 1 x float>, + bfloat, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16( + <vscale x 2 x float>, + bfloat, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16( + <vscale x 4 x float>, + bfloat, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16( + <vscale x 8 x float>, + bfloat, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, ma +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + iXLen 0, iXLen %3, iXLen 0) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16( + <vscale x 16 x float>, + bfloat, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu +; CHECK-NEXT: vfwnmsac.vf v8, fa0, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 0) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll new file mode 100644 index 0000000..d993e4e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-bf.ll @@ -0,0 +1,519 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.vv v10, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v9 +; CHECK-NEXT: vmv1r.v v11, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v11, v10 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v10, v11, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v10 +; CHECK-NEXT: vmv2r.v v14, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v14, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v12, v14, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v12 +; CHECK-NEXT: vmv4r.v v20, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vv v8, v20, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.vv v8, v16, v20, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1bf16.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.vf v9, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2bf16.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v10, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v10, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4bf16.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v12, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8bf16.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmv4r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.vf v8, v16, fa0 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.vf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16bf16.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll new file mode 100644 index 0000000..b22899a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-w-bf.ll @@ -0,0 +1,773 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v9 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v9, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v10 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v10, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v12 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v12, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwsub.wv v8, v8, v16 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vl4re16.v v24, (a0) +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vsetvli zero, a1, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + <vscale x 16 x bfloat> %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float>, + <vscale x 1 x float>, + bfloat, + <vscale x 1 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 1 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float>, + <vscale x 2 x float>, + bfloat, + <vscale x 2 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v9, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float>, + <vscale x 4 x float>, + bfloat, + <vscale x 4 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v10, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 4 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float>, + <vscale x 8 x float>, + bfloat, + <vscale x 8 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v12, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 8 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vfwsub.wf v8, v8, fa0 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.bf16( + <vscale x 16 x float> poison, + <vscale x 16 x float> %0, + bfloat %1, + iXLen 0, iXLen %2) + + ret <vscale x 16 x float> %a +} + +declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float>, + <vscale x 16 x float>, + bfloat, + <vscale x 16 x i1>, + iXLen, iXLen, iXLen); + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v16, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen 0, iXLen %4, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x float> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + <vscale x 1 x bfloat> %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x float> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v9, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + <vscale x 2 x bfloat> %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v10, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + <vscale x 4 x bfloat> %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x float> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v12, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + <vscale x 8 x bfloat> %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16(<vscale x 16 x float> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wv v8, v8, v16, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + <vscale x 16 x bfloat> %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16(<vscale x 1 x float> %0, bfloat %1, <vscale x 1 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.bf16( + <vscale x 1 x float> %0, + <vscale x 1 x float> %0, + bfloat %1, + <vscale x 1 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16(<vscale x 2 x float> %0, bfloat %1, <vscale x 2 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.bf16( + <vscale x 2 x float> %0, + <vscale x 2 x float> %0, + bfloat %1, + <vscale x 2 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16(<vscale x 4 x float> %0, bfloat %1, <vscale x 4 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.bf16( + <vscale x 4 x float> %0, + <vscale x 4 x float> %0, + bfloat %1, + <vscale x 4 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16(<vscale x 8 x float> %0, bfloat %1, <vscale x 8 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.bf16( + <vscale x 8 x float> %0, + <vscale x 8 x float> %0, + bfloat %1, + <vscale x 8 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 8 x float> %a +} + +define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16(<vscale x 16 x float> %0, bfloat %1, <vscale x 16 x i1> %2, iXLen %3) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vfwsub.wf v8, v8, fa0, v0.t +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.bf16( + <vscale x 16 x float> %0, + <vscale x 16 x float> %0, + bfloat %1, + <vscale x 16 x i1> %2, + iXLen 0, iXLen %3, iXLen 1) + + ret <vscale x 16 x float> %a +} + +define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vfwsub.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1bf16( + <vscale x 1 x float> poison, + <vscale x 1 x float> %1, + <vscale x 1 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 1 x float> %a +} + +define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: fsrmi a1, 0 +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vfwsub.wv v10, v9, v8 +; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2bf16( + <vscale x 2 x float> poison, + <vscale x 2 x float> %1, + <vscale x 2 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 2 x float> %a +} + +define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmv1r.v v12, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.wv v8, v10, v12 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4bf16( + <vscale x 4 x float> poison, + <vscale x 4 x float> %1, + <vscale x 4 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 4 x float> %a +} + +define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x float> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmv2r.v v16, v8 +; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vfwsub.wv v8, v12, v16 +; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8bf16( + <vscale x 8 x float> poison, + <vscale x 8 x float> %1, + <vscale x 8 x bfloat> %0, + iXLen 0, iXLen %2) + + ret <vscale x 8 x float> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll new file mode 100644 index 0000000..9bd859b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v9 +; CHECK-NEXT: vmfeq.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v10 +; CHECK-NEXT: vmfeq.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfeq.vv v0, v8, v12 +; CHECK-NEXT: vmfeq.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfeq_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfeq.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfeq_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfeq.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfeq_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfeq.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfeq.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfeq_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfeq.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfeq.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfeq.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfeq_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfeq_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfeq.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfeq.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll new file mode 100644 index 0000000..73946dc --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v9, v8 +; CHECK-NEXT: vmfle.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfle.vv v0, v10, v8 +; CHECK-NEXT: vmfle.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfle.vv v0, v12, v8 +; CHECK-NEXT: vmfle.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfge_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfge.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfge_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfge.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfge_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfge.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfge.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfge_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfge.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfge.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfge_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfge.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfge_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfge_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfge.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfge.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll new file mode 100644 index 0000000..fac324c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v9, v8 +; CHECK-NEXT: vmflt.vv v11, v10, v9, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmflt.vv v0, v10, v8 +; CHECK-NEXT: vmflt.vv v14, v12, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmflt.vv v0, v12, v8 +; CHECK-NEXT: vmflt.vv v20, v16, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfgt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfgt.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfgt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfgt.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfgt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfgt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfgt.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfgt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfgt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfgt.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfgt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfgt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfgt_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfgt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfgt.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll new file mode 100644 index 0000000..8356b7b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v9 +; CHECK-NEXT: vmfle.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v10 +; CHECK-NEXT: vmfle.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfle.vv v0, v8, v12 +; CHECK-NEXT: vmfle.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfle_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfle.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfle_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfle.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfle_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfle.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfle.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfle_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfle.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfle.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfle_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfle.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfle_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfle_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfle.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfle.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll new file mode 100644 index 0000000..2e1bcc5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v9 +; CHECK-NEXT: vmflt.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v10 +; CHECK-NEXT: vmflt.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmflt.vv v0, v8, v12 +; CHECK-NEXT: vmflt.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmflt_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmflt.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmflt_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmflt.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmflt_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmflt.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmflt.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmflt_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmflt.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmflt.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmflt_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmflt.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmflt_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmflt_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmflt.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmflt.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll new file mode 100644 index 0000000..283ffc5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-bf.ll @@ -0,0 +1,496 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \ +; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_vv_nxv1bf16_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat> %0, + <vscale x 1 x bfloat> %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + <vscale x 1 x bfloat>, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x bfloat> %2, <vscale x 1 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv1bf16_nxv1bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16( + <vscale x 1 x bfloat> %1, + <vscale x 1 x bfloat> %2, + iXLen %4) + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %2, + <vscale x 1 x bfloat> %3, + <vscale x 1 x i1> %mask, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_vv_nxv2bf16_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat> %0, + <vscale x 2 x bfloat> %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + <vscale x 2 x bfloat>, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x bfloat> %2, <vscale x 2 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv2bf16_nxv2bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16( + <vscale x 2 x bfloat> %1, + <vscale x 2 x bfloat> %2, + iXLen %4) + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %2, + <vscale x 2 x bfloat> %3, + <vscale x 2 x i1> %mask, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_vv_nxv4bf16_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat> %0, + <vscale x 4 x bfloat> %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + <vscale x 4 x bfloat>, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, <vscale x 4 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv4bf16_nxv4bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v9 +; CHECK-NEXT: vmfne.vv v11, v9, v10, v0.t +; CHECK-NEXT: vmv.v.v v0, v11 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16( + <vscale x 4 x bfloat> %1, + <vscale x 4 x bfloat> %2, + iXLen %4) + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %2, + <vscale x 4 x bfloat> %3, + <vscale x 4 x i1> %mask, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_vv_nxv8bf16_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat> %0, + <vscale x 8 x bfloat> %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + <vscale x 8 x bfloat>, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> %2, <vscale x 8 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv8bf16_nxv8bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v14, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v10 +; CHECK-NEXT: vmfne.vv v14, v10, v12, v0.t +; CHECK-NEXT: vmv1r.v v0, v14 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16( + <vscale x 8 x bfloat> %1, + <vscale x 8 x bfloat> %2, + iXLen %4) + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %2, + <vscale x 8 x bfloat> %3, + <vscale x 8 x i1> %mask, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_vv_nxv16bf16_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat> %0, + <vscale x 16 x bfloat> %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + <vscale x 16 x bfloat>, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x bfloat> %2, <vscale x 16 x bfloat> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vv_nxv16bf16_nxv16bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v20, v0 +; CHECK-NEXT: vmfne.vv v0, v8, v12 +; CHECK-NEXT: vmfne.vv v20, v12, v16, v0.t +; CHECK-NEXT: vmv1r.v v0, v20 +; CHECK-NEXT: ret +entry: + %mask = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16( + <vscale x 16 x bfloat> %1, + <vscale x 16 x bfloat> %2, + iXLen %4) + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %2, + <vscale x 16 x bfloat> %3, + <vscale x 16 x i1> %mask, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16( + <vscale x 1 x bfloat>, + bfloat, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_vf_nxv1bf16_bf16(<vscale x 1 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1bf16.bf16( + <vscale x 1 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16( + <vscale x 1 x i1>, + <vscale x 1 x bfloat>, + bfloat, + <vscale x 1 x i1>, + iXLen); + +define <vscale x 1 x i1> @intrinsic_vmfne_mask_vf_nxv1bf16_bf16(<vscale x 1 x i1> %0, <vscale x 1 x bfloat> %1, bfloat %2, <vscale x 1 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv1bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 1 x i1> @llvm.riscv.vmfne.mask.nxv1bf16.bf16( + <vscale x 1 x i1> %0, + <vscale x 1 x bfloat> %1, + bfloat %2, + <vscale x 1 x i1> %3, + iXLen %4) + + ret <vscale x 1 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16( + <vscale x 2 x bfloat>, + bfloat, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_vf_nxv2bf16_bf16(<vscale x 2 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.nxv2bf16.bf16( + <vscale x 2 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16( + <vscale x 2 x i1>, + <vscale x 2 x bfloat>, + bfloat, + <vscale x 2 x i1>, + iXLen); + +define <vscale x 2 x i1> @intrinsic_vmfne_mask_vf_nxv2bf16_bf16(<vscale x 2 x i1> %0, <vscale x 2 x bfloat> %1, bfloat %2, <vscale x 2 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv2bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 2 x i1> @llvm.riscv.vmfne.mask.nxv2bf16.bf16( + <vscale x 2 x i1> %0, + <vscale x 2 x bfloat> %1, + bfloat %2, + <vscale x 2 x i1> %3, + iXLen %4) + + ret <vscale x 2 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16( + <vscale x 4 x bfloat>, + bfloat, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_vf_nxv4bf16_bf16(<vscale x 4 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.nxv4bf16.bf16( + <vscale x 4 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16( + <vscale x 4 x i1>, + <vscale x 4 x bfloat>, + bfloat, + <vscale x 4 x i1>, + iXLen); + +define <vscale x 4 x i1> @intrinsic_vmfne_mask_vf_nxv4bf16_bf16(<vscale x 4 x i1> %0, <vscale x 4 x bfloat> %1, bfloat %2, <vscale x 4 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv4bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, mu +; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmfne.vf v10, v8, fa0, v0.t +; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 4 x i1> @llvm.riscv.vmfne.mask.nxv4bf16.bf16( + <vscale x 4 x i1> %0, + <vscale x 4 x bfloat> %1, + bfloat %2, + <vscale x 4 x i1> %3, + iXLen %4) + + ret <vscale x 4 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16( + <vscale x 8 x bfloat>, + bfloat, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_vf_nxv8bf16_bf16(<vscale x 8 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.nxv8bf16.bf16( + <vscale x 8 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16( + <vscale x 8 x i1>, + <vscale x 8 x bfloat>, + bfloat, + <vscale x 8 x i1>, + iXLen); + +define <vscale x 8 x i1> @intrinsic_vmfne_mask_vf_nxv8bf16_bf16(<vscale x 8 x i1> %0, <vscale x 8 x bfloat> %1, bfloat %2, <vscale x 8 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv8bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, mu +; CHECK-NEXT: vmv1r.v v11, v0 +; CHECK-NEXT: vmv1r.v v0, v10 +; CHECK-NEXT: vmfne.vf v11, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v11 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 8 x i1> @llvm.riscv.vmfne.mask.nxv8bf16.bf16( + <vscale x 8 x i1> %0, + <vscale x 8 x bfloat> %1, + bfloat %2, + <vscale x 8 x i1> %3, + iXLen %4) + + ret <vscale x 8 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16( + <vscale x 16 x bfloat>, + bfloat, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_vf_nxv16bf16_bf16(<vscale x 16 x bfloat> %0, bfloat %1, iXLen %2) nounwind { +; CHECK-LABEL: intrinsic_vmfne_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma +; CHECK-NEXT: vmfne.vf v0, v8, fa0 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.nxv16bf16.bf16( + <vscale x 16 x bfloat> %0, + bfloat %1, + iXLen %2) + + ret <vscale x 16 x i1> %a +} + +declare <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16( + <vscale x 16 x i1>, + <vscale x 16 x bfloat>, + bfloat, + <vscale x 16 x i1>, + iXLen); + +define <vscale x 16 x i1> @intrinsic_vmfne_mask_vf_nxv16bf16_bf16(<vscale x 16 x i1> %0, <vscale x 16 x bfloat> %1, bfloat %2, <vscale x 16 x i1> %3, iXLen %4) nounwind { +; CHECK-LABEL: intrinsic_vmfne_mask_vf_nxv16bf16_bf16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, mu +; CHECK-NEXT: vmv1r.v v13, v0 +; CHECK-NEXT: vmv1r.v v0, v12 +; CHECK-NEXT: vmfne.vf v13, v8, fa0, v0.t +; CHECK-NEXT: vmv1r.v v0, v13 +; CHECK-NEXT: ret +entry: + %a = call <vscale x 16 x i1> @llvm.riscv.vmfne.mask.nxv16bf16.bf16( + <vscale x 16 x i1> %0, + <vscale x 16 x bfloat> %1, + bfloat %2, + <vscale x 16 x i1> %3, + iXLen %4) + + ret <vscale x 16 x i1> %a +} + diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 470a30fd..bd4e9a4 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -37,9 +37,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -111,9 +111,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir index a7cbb35..6ab424e 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir @@ -33,9 +33,9 @@ body: | ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -115,9 +115,9 @@ body: | ; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8) ; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8) - ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]] ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll deleted file mode 100644 index 6bb0148..0000000 --- a/llvm/test/CodeGen/X86/fcmove.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-unknown" - -; Test that we can generate an fcmove, and also that it passes verification. - -; CHECK-LABEL: cmove_f -; CHECK: fcmove %st({{[0-7]}}), %st -define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) { - %test = icmp eq i32 %c, 0 - %add = fadd x86_fp80 %a, %b - %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b - ret x86_fp80 %ret -} diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll new file mode 100644 index 0000000..cb441b8 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X86-GISEL +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X64-GISEL +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X64 + +; Test that we can generate an fcmove, and also that it passes verification. + +define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) { +; X86-LABEL: cmove_cmp: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmove %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_cmp: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: xorl %eax, %eax +; X86-GISEL-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_cmp: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: testl %edi, %edi +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmove %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_cmp: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: cmpl $0, %edi +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = icmp eq i32 %c, 0 + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) { +; X86-LABEL: cmove_arg: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_arg: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movl $1, %eax +; X86-GISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_arg: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: testb $1, %dil +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_arg: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %edi +; X64-GISEL-NEXT: testl %edi, %edi +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) { +; X86-LABEL: cmove_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: cmpb $0, (%eax) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_load: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movzbl (%eax), %eax +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_load: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: cmpb $0, (%rdi) +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_load: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: movzbl (%rdi), %eax +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = load i1, ptr %p + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 3279a50..7a08f3e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -624,6 +624,52 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n ret void } +define i32 @PR164107(<16 x i1> %0) { +; AVX1-LABEL: PR164107: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX2-LABEL: PR164107: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: PR164107: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 +; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 +; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: ret{{[l|q]}} + %cmp = shufflevector <16 x i1> %0, <16 x i1> zeroinitializer, <16 x i32> zeroinitializer + %sext = sext <16 x i1> %cmp to <16 x i64> + %bc.1 = bitcast <16 x i64> %sext to <64 x i16> + %vecinit15.i = shufflevector <64 x i16> %bc.1, <64 x i16> zeroinitializer, <16 x i32> <i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> + %conv16.i = sext <16 x i16> %vecinit15.i to <16 x i64> + %bc.2 = bitcast <16 x i64> %conv16.i to <32 x i32> + %conv22.i = extractelement <32 x i32> %bc.2, i64 4 + ret i32 %conv22.i +} + define <4 x i64> @concat_self_v4i64(<2 x i64> %x) { ; AVX1-LABEL: concat_self_v4i64: ; AVX1: # %bb.0: |