diff options
Diffstat (limited to 'llvm/test')
46 files changed, 3232 insertions, 96 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir new file mode 100644 index 0000000..a4aad57 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- +name: buffer_load_lds_not_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: buffer_load_lds_not_valu + ; CHECK: liveins: $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $exec = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec + ; CHECK-NEXT: $m0 = S_MOV_B32 0 + ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0 + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec + ; CHECK-NEXT: $m0 = S_MOV_B32 1 + ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0 + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec + ; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0 + ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0 + ; CHECK-NEXT: S_ENDPGM 0 + $exec = IMPLICIT_DEF + %0:vgpr_32 = IMPLICIT_DEF + %1:sgpr_128 = IMPLICIT_DEF + %2:vgpr_32 = IMPLICIT_DEF + %3:vgpr_32 = IMPLICIT_DEF + %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec + %5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0 + $m0 = S_MOV_B32 1 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0 + %6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec + %7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec + %8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec + %9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec + %10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec + %11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec + SCHED_GROUP_BARRIER 2, 2, 0 + SCHED_GROUP_BARRIER 4, 1 ,0 + SCHED_GROUP_BARRIER 2, 2, 0 + SCHED_GROUP_BARRIER 4, 1 ,0 + SCHED_GROUP_BARRIER 2, 4, 0 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir index 9553fcc..f11fe4a 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir +++ b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir @@ -59,6 +59,15 @@ body: | ... --- +name: src_shared_base_to_vcc +body: | + bb.0: + ; GFX9-LABEL: name: src_shared_base_to_vcc + ; GFX9: $vcc = S_MOV_B64 $src_shared_base + $vcc = COPY $src_shared_base +... + +--- name: sgpr96_aligned_src_dst body: | bb.0: diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir index 1030917..302f70f 100644 --- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir +++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck %s +# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck --strict-whitespace %s --- | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64" diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll index 0f7e0c7..1325abf 100644 --- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll +++ b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll @@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1 !6 = !{!"short", !7, i64 0} !7 = !{!"omnipotent char", !8, i64 0} !8 = !{!"Simple C/C++ TBAA"} + +; Function to lockdown changes for floating point vector comparisons +define range(i32 0, 5) i32 @cols_needed(ptr %colauths){ +; POWERPC_64LE-LABEL: cols_needed: +; POWERPC_64LE: # %bb.0: # %entry +; POWERPC_64LE-NEXT: lxv vs0, 0(r3) +; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_64LE-NEXT: li r4, 4 +; POWERPC_64LE-NEXT: li r3, 0 +; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0 +; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2 +; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2 +; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30 +; POWERPC_64LE-NEXT: sub r3, r4, r3 +; POWERPC_64LE-NEXT: mfvsrwz r4, v2 +; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29 +; POWERPC_64LE-NEXT: or r3, r3, r4 +; POWERPC_64LE-NEXT: li r4, 12 +; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2 +; POWERPC_64LE-NEXT: slwi r4, r4, 3 +; POWERPC_64LE-NEXT: or r3, r3, r4 +; POWERPC_64LE-NEXT: clrlwi r3, r3, 28 +; POWERPC_64LE-NEXT: stb r3, -1(r1) +; POWERPC_64LE-NEXT: lbz r3, -1(r1) +; POWERPC_64LE-NEXT: popcntd r3, r3 +; POWERPC_64LE-NEXT: blr +; +; POWERPC_64-LABEL: cols_needed: +; POWERPC_64: # %bb.0: # %entry +; POWERPC_64-NEXT: lxv vs0, 0(r3) +; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_64-NEXT: li r4, 8 +; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_64-NEXT: xxlnor v2, vs0, vs0 +; POWERPC_64-NEXT: vextuwlx r4, r4, v2 +; POWERPC_64-NEXT: mfvsrwz r3, v2 +; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30 +; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29 +; POWERPC_64-NEXT: li r3, 0 +; POWERPC_64-NEXT: vextuwlx r3, r3, v2 +; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28 +; POWERPC_64-NEXT: li r3, 12 +; POWERPC_64-NEXT: vextuwlx r3, r3, v2 +; POWERPC_64-NEXT: sub r3, r4, r3 +; POWERPC_64-NEXT: clrlwi r3, r3, 28 +; POWERPC_64-NEXT: stb r3, -1(r1) +; POWERPC_64-NEXT: lbz r3, -1(r1) +; POWERPC_64-NEXT: popcntd r3, r3 +; POWERPC_64-NEXT: blr +; +; POWERPC_32-LABEL: cols_needed: +; POWERPC_32: # %bb.0: # %entry +; POWERPC_32-NEXT: lxv vs0, 0(r3) +; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0 +; POWERPC_32-NEXT: stxv vs0, -32(r1) +; POWERPC_32-NEXT: lwz r3, -24(r1) +; POWERPC_32-NEXT: lwz r4, -28(r1) +; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30 +; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29 +; POWERPC_32-NEXT: lwz r4, -32(r1) +; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28 +; POWERPC_32-NEXT: lwz r4, -20(r1) +; POWERPC_32-NEXT: sub r3, r3, r4 +; POWERPC_32-NEXT: clrlwi r3, r3, 28 +; POWERPC_32-NEXT: popcntw r3, r3 +; POWERPC_32-NEXT: blr +entry: + %0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5 + %1 = fcmp une <4 x float> %0, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2) + %4 = zext nneg i4 %3 to i32 + ret i32 %4 +} diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll index 2a46a59..4f036d3 100644 --- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll +++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll @@ -221,8 +221,8 @@ define i64 @test12(i64 %0) #0 { ; ; RV64-LABEL: test12: ; RV64: # %bb.0: # %entry -; RV64-NEXT: addiw a0, a0, -16 -; RV64-NEXT: addi a0, a0, 13 +; RV64-NEXT: addi a0, a0, -16 +; RV64-NEXT: addiw a0, a0, 13 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll index 88d989d..2742b9a 100644 --- a/llvm/test/CodeGen/RISCV/i64-icmp.ll +++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll @@ -708,8 +708,7 @@ define i64 @icmp_sle_constant_neg_2050(i64 %a) nounwind { define i64 @icmp_eq_zext_inreg_small_constant(i64 %a) nounwind { ; RV64I-LABEL: icmp_eq_zext_inreg_small_constant: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -123 +; RV64I-NEXT: addiw a0, a0, -123 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = and i64 %a, 4294967295 @@ -748,8 +747,7 @@ define i64 @icmp_ne_zext_inreg_small_constant(i64 %a) nounwind { define i64 @icmp_ne_zext_inreg_large_constant(i64 %a) nounwind { ; RV64I-LABEL: icmp_ne_zext_inreg_large_constant: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, 2 +; RV64I-NEXT: addiw a0, a0, 2 ; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: ret %1 = and i64 %a, 4294967295 diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll new file mode 100644 index 0000000..d9a49a1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -O0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64 + +define void @matmul() { +; CHECK-RV64-LABEL: matmul: +; CHECK-RV64: # %bb.0: # %entry +; CHECK-RV64-NEXT: li a0, 0 +; CHECK-RV64-NEXT: vsetvli zero, a0, 512 +; CHECK-RV64-NEXT: sf.vsettm zero, a0 +; CHECK-RV64-NEXT: sf.vtzero.t mt0 +; CHECK-RV64-NEXT: ret +entry: + call void @llvm.riscv.sf.vtzero.t.i64(i64 0, i64 0, i64 0, i64 3, i64 1) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare void @llvm.riscv.sf.vtzero.t.i64(i64 immarg, i64, i64, i64 immarg, i64 immarg) #0 diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir new file mode 100644 index 0000000..389283a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir @@ -0,0 +1,523 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v \ +# RUN: -run-pass=phi-node-elimination,register-coalescer,riscv-insert-vsetvli | FileCheck %s + +--- | + define void @xsfmm_same_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 noundef %tm, i64 noundef %tn, i64 noundef %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + ret void + } + + define void @xsfmm_different_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 4) + ret void + } + + define void @xsfmm_different_state_bf(<vscale x 32 x half> %tile1, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64 2, <vscale x 32 x bfloat> %tile2, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2) + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2) + ret void + } + + define <vscale x 64 x i8> @interleave_rvv_and_xsfmm(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) { + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl) + %1 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl) + call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl) + ret <vscale x 64 x i8> %1 + } + + define <vscale x 64 x i8> @interleave_rvv_and_xsfmm2(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) { + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %tile, i64 %vl) + %1 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl) + %2 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl) + call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl) + ret <vscale x 64 x i8> %2 + } + + define void @consecutive_xsfmm(<vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, ptr %base) { + entry: + tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 0, <vscale x 32 x half> %tile, <vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, i64 2) + call void @llvm.riscv.sf.vste16.i64(i64 0, ptr %base, i64 %tn) + ret void + } + + define i64 @vsettnt_max(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2) + %1 = call i64 @llvm.riscv.sf.vsettnt_max.i64(i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettm(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettn(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettn.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define i64 @single_vsettk(i64 %vl) { + entry: + %0 = call i64 @llvm.riscv.sf.vsettk.i64(i64 %vl, i64 1, i64 2) + ret i64 %0 + } + + define void @sf_vtzero(i64 %tm, i64 %tn) { + entry: + call void @llvm.riscv.sf.vtzero.i64(i64 1, i64 %tm, i64 %tn, i64 3, i64 4) + ret void + } + + declare void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64, <vscale x 32 x half>, <vscale x 32 x half>, i64, i64, i64, i64) + declare void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i64, i64, i64, i64) + declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64, i64) + declare <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, i64) + declare void @llvm.riscv.sf.vste16.i64(i64, ptr, i64) + declare i64 @llvm.riscv.sf.vsettnt_max.i64(i64, i64) + declare i64 @llvm.riscv.sf.vsettm.i64(i64, i64, i64) + declare i64 @llvm.riscv.sf.vsettn.i64(i64, i64, i64) + declare i64 @llvm.riscv.sf.vsettk.i64(i64, i64, i64) + declare void @llvm.riscv.sf.vtzero.i64(i64, i64, i64, i64, i64) +... +--- +name: xsfmm_same_state +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_same_state + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoRET +... +--- +name: xsfmm_different_state +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_different_state + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1544 /* e16, w4 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 4, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 4, implicit $frm + PseudoRET +... +--- +name: xsfmm_different_state_bf +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: vrm8 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$v8m8', virtual-reg: '%1' } + - { reg: '$x10', virtual-reg: '%2' } + - { reg: '$x11', virtual-reg: '%3' } + - { reg: '$x12', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-LABEL: name: xsfmm_different_state_bf + ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1288 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F_ALT $t2, [[COPY3]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %4:gprnox0 = COPY $x12 + %3:gprnox0 = COPY $x11 + %2:gprnox0 = COPY $x10 + %1:vrm8 = COPY $v16m8 + %0:vrm8 = COPY $v8m8 + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F_ALT $t2, %1:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm + PseudoRET +... +--- +name: interleave_rvv_and_xsfmm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vrm8 } + - { id: 5, class: vrm8 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11 + ; CHECK-LABEL: name: interleave_rvv_and_xsfmm + ; CHECK: liveins: $v8m8, $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[PseudoSF_VTMV_V_T]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_]], implicit $vtype + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %2:gpr = COPY $x11 + %1:gprnox0 = COPY $x10 + %0:vrm8 = COPY $v8m8 + %3:gpr = ADDI $x0, 1 + %4:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1 + %5:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0 + PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1 + $v8m8 = COPY %5:vrm8 + PseudoRET implicit $v8m8 +... +--- +name: interleave_rvv_and_xsfmm2 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: vrm8 } + - { id: 5, class: vrm8 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11 + ; CHECK-LABEL: name: interleave_rvv_and_xsfmm2 + ; CHECK: liveins: $v8m8, $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1 + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[COPY2]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M8_1:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[PseudoVADD_VV_M8_]], [[PseudoVADD_VV_M8_]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_1]], implicit $vtype + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %2:gpr = COPY $x11 + %1:gprnox0 = COPY $x10 + %0:vrm8 = COPY $v8m8 + %3:gpr = ADDI $x0, 1 + %4:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %0:vrm8, %1:gprnox0, 3, 0 + %5:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1 + %6:vrm8 = PseudoVADD_VV_M8 $noreg, %4:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0 + PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1 + $v8m8 = COPY %6:vrm8 + PseudoRET implicit $v8m8 +... +--- +name: consecutive_xsfmm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: vrm8 } + - { id: 1, class: gprnox0 } + - { id: 2, class: gprnox0 } + - { id: 3, class: gprnox0 } + - { id: 4, class: gprnox0 } +liveins: + - { reg: '$v8m8', virtual-reg: '%0' } + - { reg: '$x10', virtual-reg: '%1' } + - { reg: '$x11', virtual-reg: '%2' } + - { reg: '$x12', virtual-reg: '%3' } + - { reg: '$x13', virtual-reg: '%4' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $v8m8, $x10, $x11, $x12, $x13 + ; CHECK-LABEL: name: consecutive_xsfmm + ; CHECK: liveins: $v8m8, $x10, $x11, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY $v8m8 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnox0 = COPY $x12 + ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:gprnox0 = COPY $x13 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY2]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY1]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY3]], 4, 2, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY]], [[COPY]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY3]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: PseudoSF_VSTE16 [[COPY1]], [[COPY2]], $noreg, 4, 1, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:vrm8 = COPY $v8m8 + %1:gprnox0 = COPY $x10 + %2:gprnox0 = COPY $x11 + %3:gprnox0 = COPY $x12 + %4:gprnox0 = COPY $x13 + PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 2, implicit $frm + PseudoSF_VSTE16 %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 1 + PseudoRET +... +--- +name: vsettnt_max +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: vsettnt_max + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_1:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + %2:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype + %3:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %3:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettm +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettm + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettn +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettn + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[PseudoSF_VSETTNT:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNT [[COPY]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTNT]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTNT %0:gprnox0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: single_vsettk +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10 + ; CHECK-LABEL: name: single_vsettk + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype + ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTK]] + ; CHECK-NEXT: PseudoRET implicit $x10 + %0:gprnox0 = COPY $x10 + %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype + $x10 = COPY %1:gprnox0 + PseudoRET implicit $x10 +... +--- +name: sf_vtzero +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: gprnox0 } + - { id: 1, class: gprnox0 } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $x10, $x11 + ; CHECK-LABEL: name: sf_vtzero + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11 + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1536 /* e8, w4 */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY]], 3, 3, implicit-def $vtype, implicit $vtype + ; CHECK-NEXT: PseudoSF_VTZERO_T $t1, $noreg, $noreg, 3, 4, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:gprnox0 = COPY $x10 + %1:gprnox0 = COPY $x11 + PseudoSF_VTZERO_T $t1, %0:gprnox0, %1:gprnox0, 3, 4 + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll new file mode 100644 index 0000000..9b9a849 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e4m3.e4m3 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll new file mode 100644 index 0000000..b63974f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e4m3.e5m2 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll new file mode 100644 index 0000000..62d629b1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e5m2.e4m3 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll new file mode 100644 index 0000000..7a90c97 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e5m2.e5m2 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll new file mode 100644 index 0000000..29451c6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen, <vscale x 32 x half>, <vscale x 32 x half>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w2_f16m8(iXLen %mtd, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w2_f16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w2 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen 0, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 2) + ret void +} + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen, <vscale x 16 x float>, <vscale x 16 x float>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w1_f32m8(iXLen %mtd, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w1_f32m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen 0, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1) + ret void +} + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen, <vscale x 8 x double>, <vscale x 8 x double>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w1_f64m8(iXLen %mtd, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w1_f64m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen 0, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll new file mode 100644 index 0000000..6a4b29f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_s_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_s_s_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.s.s mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll new file mode 100644 index 0000000..79239b0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_s_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_s_u_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.s.u mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll new file mode 100644 index 0000000..b0d039b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_u_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_u_s_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.u.s mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll new file mode 100644 index 0000000..913c277 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_u_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_u_u_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.u.u mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll new file mode 100644 index 0000000..8048dec --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte16.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte16(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1 +; CHECK-NEXT: sf.vlte16 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte16.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll new file mode 100644 index 0000000..a526dc8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte32.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte32(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vlte32 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte32.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll new file mode 100644 index 0000000..ed0c48a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte64.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte64(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vlte64 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte64.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll new file mode 100644 index 0000000..67b3ed2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte8.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte8(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1 +; CHECK-NEXT: sf.vlte8 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte8.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll new file mode 100644 index 0000000..4da37fa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettk(iXLen %tk) { +; CHECK-LABEL: test_sf_vsettk: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a1, zero, e16, w2 +; CHECK-NEXT: sf.vsettk a0, a0 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen %tk, iXLen 1, iXLen 2) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll new file mode 100644 index 0000000..143c26c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettm(iXLen %tm) { +; CHECK-LABEL: test_sf_vsettm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a1, zero, e8, w4 +; CHECK-NEXT: sf.vsettm a0, a0 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen %tm, iXLen 0, iXLen 3) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll new file mode 100644 index 0000000..48fa1bc8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettnt_e8w1(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w1 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 1) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e8w2(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w2 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 2) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e8w4(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w4 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 3) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w1(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w1 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 1) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w2(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w2 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 2) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w4(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w4 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 3) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll new file mode 100644 index 0000000..7a76151 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste16.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste16(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1 +; CHECK-NEXT: sf.vste16 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste16.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll new file mode 100644 index 0000000..8ff6e6a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste32.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste32(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vste32 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste32.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll new file mode 100644 index 0000000..53990e4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste64.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste64(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vste64 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste64.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll new file mode 100644 index 0000000..09b7259 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste8.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste8(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1 +; CHECK-NEXT: sf.vste8 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste8.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll new file mode 100644 index 0000000..394eb60 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtdiscard() + +define dso_local void @test_sf_vtdiscard() { +; CHECK-LABEL: test_sf_vtdiscard: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vtdiscard +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtdiscard() + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll new file mode 100644 index 0000000..66c9d26 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen, <vscale x 32 x bfloat>, iXLen) + +define void @test_sf_vtmv_t_v_bf16m8(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_bf16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen, <vscale x 32 x half>, iXLen) + +define void @test_sf_vtmv_t_v_f16(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen, <vscale x 16 x float>, iXLen) + +define void @test_sf_vtmv_t_v_f32(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen, <vscale x 8 x double>, iXLen) + +define void @test_sf_vtmv_t_v_f64(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen, <vscale x 64 x i8>, iXLen) + +define void @test_sf_vtmv_t_v_i8(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen, <vscale x 32 x i16>, iXLen) + +define void @test_sf_vtmv_t_v_i16(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen, <vscale x 16 x i32>, iXLen) + +define void @test_sf_vtmv_t_v_i32(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen, <vscale x 8 x i64>, iXLen) + +define void @test_sf_vtmv_t_v_i64(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll new file mode 100644 index 0000000..0dcc2ab --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen, iXLen) + +define <vscale x 32 x bfloat> @test_sf_vtmv_v_t_bf16m8(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_bf16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x bfloat> %0 +} + +declare <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen, iXLen) + +define <vscale x 32 x half> @test_sf_vtmv_v_t_f16(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x half> %0 +} + +declare <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen, iXLen) + +define <vscale x 16 x float> @test_sf_vtmv_v_t_f32(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 16 x float> %0 +} + +declare <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen, iXLen) + +define <vscale x 8 x double> @test_sf_vtmv_v_t_f64(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 8 x double> %0 +} + +declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen, iXLen) + +define <vscale x 64 x i8> @test_sf_vtmv_v_t_i8(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 64 x i8> %0 +} + +declare <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen, iXLen) + +define <vscale x 32 x i16> @test_sf_vtmv_v_t_i16(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x i16> %0 +} + +declare <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen, iXLen) + +define <vscale x 16 x i32> @test_sf_vtmv_v_t_i32(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 16 x i32> %0 +} + +declare <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen, iXLen) + +define <vscale x 8 x i64> @test_sf_vtmv_v_t_i64(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 8 x i64> %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll new file mode 100644 index 0000000..bbccb02 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtzero.t.iXLen(iXLen, iXLen, iXLen, iXLen, iXLen) +define void @test_sf_vtzero_t(iXLen %tm, iXLen %tn) { +; CHECK-LABEL: test_sf_vtzero_t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a0 +; CHECK-NEXT: sf.vtzero.t mt0 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtzero.t.iXLen(iXLen 0, iXLen %tm, iXLen %tn, iXLen 3, iXLen 4) + ret void +} + diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll index 2f03ff9..318268a 100644 --- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll +++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll @@ -15,8 +15,7 @@ define i32 @from_cmpeq(i32 %xx, i32 %y) { ; ; RV64I-LABEL: from_cmpeq: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -9 +; RV64I-NEXT: addiw a0, a0, -9 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: ret @@ -39,8 +38,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) { ; ; RV64I-LABEL: from_cmpeq_fail_bad_andmask: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a0, a0, -9 +; RV64I-NEXT: addiw a0, a0, -9 ; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: addi a0, a0, -1 ; RV64I-NEXT: and a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll index fabb573..4e14893 100644 --- a/llvm/test/CodeGen/RISCV/setcc-logic.ll +++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll @@ -104,9 +104,8 @@ define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind { ; ; RV64I-LABEL: and_icmps_const_not1bit_diff: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: addi a1, a0, -44 -; RV64I-NEXT: addi a0, a0, -92 +; RV64I-NEXT: addiw a1, a0, -44 +; RV64I-NEXT: addiw a0, a0, -92 ; RV64I-NEXT: snez a1, a1 ; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: and a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index bdbe4ed..07bfbe6 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -674,8 +674,7 @@ define i32 @sext_of_not_cmp_i32(i32 %x) { ; ; RV64-LABEL: sext_of_not_cmp_i32: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: addiw a0, a0, -7 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: ret @@ -718,8 +717,7 @@ define i32 @dec_of_zexted_cmp_i32(i32 %x) { ; ; RV64-LABEL: dec_of_zexted_cmp_i32: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: addi a0, a0, -7 +; RV64-NEXT: addiw a0, a0, -7 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: addi a0, a0, -1 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index 2751332c..bf6802d 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -1047,8 +1047,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) { ; RV64-LABEL: usubo.i32.constant.lhs: ; RV64: # %bb.0: # %entry ; RV64-NEXT: li a2, -2 -; RV64-NEXT: subw a2, a2, a0 -; RV64-NEXT: addi a0, a2, 1 +; RV64-NEXT: sub a2, a2, a0 +; RV64-NEXT: addiw a0, a2, 1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: sw a2, 0(a1) ; RV64-NEXT: ret @@ -1065,8 +1065,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) { ; RV64ZBA-LABEL: usubo.i32.constant.lhs: ; RV64ZBA: # %bb.0: # %entry ; RV64ZBA-NEXT: li a2, -2 -; RV64ZBA-NEXT: subw a2, a2, a0 -; RV64ZBA-NEXT: addi a0, a2, 1 +; RV64ZBA-NEXT: sub a2, a2, a0 +; RV64ZBA-NEXT: addiw a0, a2, 1 ; RV64ZBA-NEXT: seqz a0, a0 ; RV64ZBA-NEXT: sw a2, 0(a1) ; RV64ZBA-NEXT: ret @@ -1083,8 +1083,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) { ; RV64ZICOND-LABEL: usubo.i32.constant.lhs: ; RV64ZICOND: # %bb.0: # %entry ; RV64ZICOND-NEXT: li a2, -2 -; RV64ZICOND-NEXT: subw a2, a2, a0 -; RV64ZICOND-NEXT: addi a0, a2, 1 +; RV64ZICOND-NEXT: sub a2, a2, a0 +; RV64ZICOND-NEXT: addiw a0, a2, 1 ; RV64ZICOND-NEXT: seqz a0, a0 ; RV64ZICOND-NEXT: sw a2, 0(a1) ; RV64ZICOND-NEXT: ret diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll new file mode 100644 index 0000000..abbd953 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mcpu=mvp -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s + +; This test ensures that loads and stores generated for small memcpy et al use +; constant offset folding. + + +target triple = "wasm32-unknown-unknown" + +define void @call_memset(ptr) #0 { +; CHECK-LABEL: call_memset: +; CHECK: .functype call_memset (i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.const $push0=, 0 +; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0 +; CHECK-NEXT: i64.const $push1=, 0 +; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1 +; CHECK-NEXT: # fallthrough-return + call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false) + ret void +} + +define void @call_memcpy(ptr %dst, ptr %src) #0 { +; CHECK-LABEL: call_memcpy: +; CHECK: .functype call_memcpy (i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 +; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0 +; CHECK-NEXT: i64.load $push1=, 0($1):p2align=0 +; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1 +; CHECK-NEXT: # fallthrough-return + call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false) + ret void +} + + +define void @call_memmove(ptr %dst, ptr %src) #0 { +; CHECK-LABEL: call_memmove: +; CHECK: .functype call_memmove (i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i64.load $2=, 0($1):p2align=0 +; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0 +; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0 +; CHECK-NEXT: i64.store 0($0):p2align=0, $2 +; CHECK-NEXT: # fallthrough-return + call void @llvm.memmove.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll new file mode 100644 index 0000000..3654aae --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mattr=+simd128 | FileCheck %s + +target triple = "wasm32-unknown-unknown" + +define <4 x i32> @dot_sext_1(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_sext_1: +; CHECK: .functype dot_sext_1 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + + +define <4 x i32> @dot_sext_2(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_sext_2: +; CHECK: .functype dot_sext_2 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle2, %shuffle1 + ret <4 x i32> %res +} + +define <4 x i32> @dot_sext_self(<8 x i16> %v) { +; CHECK-LABEL: dot_sext_self: +; CHECK: .functype dot_sext_self (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32x4.dot_i16x8_s +; CHECK-NEXT: # fallthrough-return + %sext = sext <8 x i16> %v to <8 x i32> + %mul = mul <8 x i32> %sext, %sext + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + +; INFO: Negative test +define <4 x i32> @dot_zext(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_zext: +; CHECK: .functype dot_zext (v128, v128) -> (v128) +; CHECK-NEXT: .local v128 +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_low_i16x8_u +; CHECK-NEXT: local.tee 2 +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_high_i16x8_u +; CHECK-NEXT: local.tee 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: local.get 2 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 +; CHECK-NEXT: i32x4.add +; CHECK-NEXT: # fallthrough-return + %zext1 = zext <8 x i16> %a to <8 x i32> + %zext2 = zext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %zext1, %zext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} + +; INFO: Negative test +define <4 x i32> @dot_wrong_shuffle(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: dot_wrong_shuffle: +; CHECK: .functype dot_wrong_shuffle (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_low_i16x8_s +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i32x4.extmul_high_i16x8_s +; CHECK-NEXT: i32x4.add +; CHECK-NEXT: # fallthrough-return + %sext1 = sext <8 x i16> %a to <8 x i32> + %sext2 = sext <8 x i16> %b to <8 x i32> + %mul = mul <8 x i32> %sext1, %sext2 + %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> + %res = add <4 x i32> %shuffle1, %shuffle2 + ret <4 x i32> %res +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll index e065de3..600241a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll @@ -2,9 +2,278 @@ ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED ; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD target triple = "wasm32" +define half @fadd_fmul_contract_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fadd_fmul_contract_f16: +; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fadd_fmul_contract_f16: +; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $0 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $1 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fadd_fmul_contract_f16: +; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fadd_fmul_contract_f16: +; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %mul = fmul contract half %b, %a + %add = fadd contract half %mul, %c + ret half %add +} + +define half @fmuladd_contract_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fmuladd_contract_f16: +; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fmuladd_contract_f16: +; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $0 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fmuladd_contract_f16: +; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fmuladd_contract_f16: +; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c) + ret half %fma +} + +define half @fmuladd_f16(half %a, half %b, half %c) { +; RELAXED-LABEL: fmuladd_f16: +; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: call $push0=, __truncsfhf2, $1 +; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0 +; RELAXED-NEXT: call $push2=, __truncsfhf2, $0 +; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2 +; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3 +; RELAXED-NEXT: call $push5=, __truncsfhf2, $2 +; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5 +; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6 +; RELAXED-NEXT: return $pop7 +; +; STRICT-LABEL: fmuladd_f16: +; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: call $push0=, __truncsfhf2, $1 +; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0 +; STRICT-NEXT: call $push2=, __truncsfhf2, $0 +; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2 +; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3 +; STRICT-NEXT: call $push5=, __truncsfhf2, $2 +; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5 +; STRICT-NEXT: f32.add $push7=, $pop4, $pop6 +; STRICT-NEXT: return $pop7 +; +; NOFP16-LABEL: fmuladd_f16: +; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $0 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: return $pop7 +; +; NOSIMD-LABEL: fmuladd_f16: +; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: return $pop7 + %fma = call half @llvm.fmuladd(half %a, half %b, half %c) + ret half %fma +} + + +define float @fadd_fmul_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fadd_fmul_contract_f32: +; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $1, $0 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fadd_fmul_contract_f32: +; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $1, $0 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_f32: +; NOFP16: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $1, $0 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_f32: +; NOSIMD: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $1, $0 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %mul = fmul contract float %b, %a + %add = fadd contract float %mul, %c + ret float %add +} + +define float @fmuladd_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fmuladd_contract_f32: +; RELAXED: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $0, $1 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_contract_f32: +; STRICT: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $0, $1 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_f32: +; NOFP16: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $0, $1 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_f32: +; NOSIMD: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $0, $1 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c) + ret float %fma +} + +define float @fmuladd_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fmuladd_f32: +; RELAXED: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $0, $1 +; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_f32: +; STRICT: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $0, $1 +; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_f32: +; NOFP16: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32.mul $push0=, $0, $1 +; NOFP16-NEXT: f32.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_f32: +; NOSIMD: .functype fmuladd_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $0, $1 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call float @llvm.fmuladd(float %a, float %b, float %c) + ret float %fma +} + define double @fadd_fmul_contract_f64(double %a, double %b, double %c) { ; RELAXED-LABEL: fadd_fmul_contract_f64: ; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) @@ -19,16 +288,94 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) { ; STRICT-NEXT: f64.mul $push0=, $1, $0 ; STRICT-NEXT: f64.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_f64: +; NOFP16: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $1, $0 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_f64: +; NOSIMD: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $1, $0 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 %mul = fmul contract double %b, %a %add = fadd contract double %mul, %c ret double %add } +define double @fmuladd_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fmuladd_f64: +; RELAXED: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $0, $1 +; RELAXED-NEXT: f64.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_f64: +; STRICT: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $0, $1 +; STRICT-NEXT: f64.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_f64: +; NOFP16: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $0, $1 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_f64: +; NOSIMD: .functype fmuladd_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $0, $1 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call double @llvm.fmuladd(double %a, double %b, double %c) + ret double %fma +} + +define double @fmuladd_contract_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fmuladd_contract_f64: +; RELAXED: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $0, $1 +; RELAXED-NEXT: f64.add $push1=, $pop0, $2 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fmuladd_contract_f64: +; STRICT: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $0, $1 +; STRICT-NEXT: f64.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_f64: +; NOFP16: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64.mul $push0=, $0, $1 +; NOFP16-NEXT: f64.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_f64: +; NOSIMD: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $0, $1 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $2 +; NOSIMD-NEXT: return $pop1 + %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c) + ret double %fma +} + define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fadd_fmul_contract_4xf32: ; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_4xf32: @@ -37,31 +384,222 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 ; STRICT-NEXT: f32x4.mul $push0=, $1, $0 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_4xf32: +; NOFP16: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $1, $0 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_4xf32: +; NOSIMD: .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $4 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $3 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $2 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $1 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %mul = fmul contract <4 x float> %b, %a %add = fadd contract <4 x float> %mul, %c ret <4 x float> %add } - define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { ; RELAXED-LABEL: fadd_fmul_contract_8xf16: ; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f16x8.madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_8xf16: ; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f16x8.mul $push0=, $1, $0 -; STRICT-NEXT: f16x8.add $push1=, $pop0, $2 -; STRICT-NEXT: return $pop1 +; STRICT-NEXT: f16x8.madd $push0=, $1, $0, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fadd_fmul_contract_8xf16: +; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fadd_fmul_contract_8xf16: +; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return %mul = fmul contract <8 x half> %b, %a %add = fadd contract <8 x half> %mul, %c ret <8 x half> %add } - define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fadd_fmul_4xf32: ; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128) @@ -76,16 +614,412 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> ; STRICT-NEXT: f32x4.mul $push0=, $1, $0 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_4xf32: +; NOFP16: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $1, $0 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_4xf32: +; NOSIMD: .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $4 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $3 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $2 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $1 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %mul = fmul <4 x float> %b, %a %add = fadd contract <4 x float> %mul, %c ret <4 x float> %add } +define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_contract_8xf16: +; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_contract_8xf16: +; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fmuladd_contract_8xf16: +; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_contract_8xf16: +; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return + %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + +define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_8xf16: +; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_8xf16: +; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fmuladd_8xf16: +; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, __truncsfhf2, $16 +; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOFP16-NEXT: call $push2=, __truncsfhf2, $8 +; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOFP16-NEXT: call $push5=, __truncsfhf2, $24 +; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6 +; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOFP16-NEXT: i32.store16 14($0), $pop8 +; NOFP16-NEXT: call $push9=, __truncsfhf2, $15 +; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOFP16-NEXT: call $push11=, __truncsfhf2, $7 +; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOFP16-NEXT: call $push14=, __truncsfhf2, $23 +; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15 +; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOFP16-NEXT: i32.store16 12($0), $pop17 +; NOFP16-NEXT: call $push18=, __truncsfhf2, $14 +; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOFP16-NEXT: call $push20=, __truncsfhf2, $6 +; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOFP16-NEXT: call $push23=, __truncsfhf2, $22 +; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24 +; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOFP16-NEXT: i32.store16 10($0), $pop26 +; NOFP16-NEXT: call $push27=, __truncsfhf2, $13 +; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOFP16-NEXT: call $push29=, __truncsfhf2, $5 +; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOFP16-NEXT: call $push32=, __truncsfhf2, $21 +; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33 +; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOFP16-NEXT: i32.store16 8($0), $pop35 +; NOFP16-NEXT: call $push36=, __truncsfhf2, $12 +; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOFP16-NEXT: call $push38=, __truncsfhf2, $4 +; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOFP16-NEXT: call $push41=, __truncsfhf2, $20 +; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42 +; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOFP16-NEXT: i32.store16 6($0), $pop44 +; NOFP16-NEXT: call $push45=, __truncsfhf2, $11 +; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOFP16-NEXT: call $push47=, __truncsfhf2, $3 +; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOFP16-NEXT: call $push50=, __truncsfhf2, $19 +; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51 +; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOFP16-NEXT: i32.store16 4($0), $pop53 +; NOFP16-NEXT: call $push54=, __truncsfhf2, $10 +; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOFP16-NEXT: call $push56=, __truncsfhf2, $2 +; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOFP16-NEXT: call $push59=, __truncsfhf2, $18 +; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60 +; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOFP16-NEXT: i32.store16 2($0), $pop62 +; NOFP16-NEXT: call $push63=, __truncsfhf2, $9 +; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOFP16-NEXT: call $push65=, __truncsfhf2, $1 +; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOFP16-NEXT: call $push68=, __truncsfhf2, $17 +; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69 +; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOFP16-NEXT: i32.store16 0($0), $pop71 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_8xf16: +; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16 +; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0 +; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8 +; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2 +; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3 +; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24 +; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5 +; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6 +; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7 +; NOSIMD-NEXT: i32.store16 14($0), $pop8 +; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15 +; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9 +; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7 +; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11 +; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12 +; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23 +; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14 +; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15 +; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16 +; NOSIMD-NEXT: i32.store16 12($0), $pop17 +; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14 +; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18 +; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6 +; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20 +; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21 +; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22 +; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23 +; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24 +; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25 +; NOSIMD-NEXT: i32.store16 10($0), $pop26 +; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13 +; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27 +; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5 +; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29 +; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30 +; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21 +; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32 +; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33 +; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34 +; NOSIMD-NEXT: i32.store16 8($0), $pop35 +; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12 +; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36 +; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4 +; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38 +; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39 +; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20 +; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41 +; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42 +; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43 +; NOSIMD-NEXT: i32.store16 6($0), $pop44 +; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11 +; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45 +; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3 +; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47 +; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48 +; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19 +; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50 +; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51 +; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52 +; NOSIMD-NEXT: i32.store16 4($0), $pop53 +; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10 +; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54 +; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2 +; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56 +; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57 +; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18 +; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59 +; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60 +; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61 +; NOSIMD-NEXT: i32.store16 2($0), $pop62 +; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9 +; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63 +; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1 +; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65 +; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66 +; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17 +; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68 +; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69 +; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70 +; NOSIMD-NEXT: i32.store16 0($0), $pop71 +; NOSIMD-NEXT: return + %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fmuladd_contract_4xf32: ; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fmuladd_contract_4xf32: @@ -94,18 +1028,40 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x ; STRICT-NEXT: f32x4.mul $push0=, $0, $1 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_4xf32: +; NOFP16: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $0, $1 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_4xf32: +; NOSIMD: .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $4, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $3, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $2, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $1, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } -; TODO: This should also have relaxed_madd in RELAXED case define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fmuladd_4xf32: ; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.mul $push0=, $0, $1 -; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2 -; RELAXED-NEXT: return $pop1 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fmuladd_4xf32: ; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) @@ -113,10 +1069,170 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c ; STRICT-NEXT: f32x4.mul $push0=, $0, $1 ; STRICT-NEXT: f32x4.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_4xf32: +; NOFP16: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $0, $1 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_4xf32: +; NOSIMD: .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $4, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $3, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $2, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $1, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop7 +; NOSIMD-NEXT: return %fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } +define <8 x float> @fmuladd_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; RELAXED-LABEL: fmuladd_8xf32: +; RELAXED: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.mul $push0=, $2, $4 +; RELAXED-NEXT: f32x4.add $push1=, $pop0, $6 +; RELAXED-NEXT: v128.store 16($0), $pop1 +; RELAXED-NEXT: f32x4.mul $push2=, $1, $3 +; RELAXED-NEXT: f32x4.add $push3=, $pop2, $5 +; RELAXED-NEXT: v128.store 0($0), $pop3 +; RELAXED-NEXT: return +; +; STRICT-LABEL: fmuladd_8xf32: +; STRICT: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $2, $4 +; STRICT-NEXT: f32x4.add $push1=, $pop0, $6 +; STRICT-NEXT: v128.store 16($0), $pop1 +; STRICT-NEXT: f32x4.mul $push2=, $1, $3 +; STRICT-NEXT: f32x4.add $push3=, $pop2, $5 +; STRICT-NEXT: v128.store 0($0), $pop3 +; STRICT-NEXT: return +; +; NOFP16-LABEL: fmuladd_8xf32: +; NOFP16: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $2, $4 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6 +; NOFP16-NEXT: v128.store 16($0), $pop1 +; NOFP16-NEXT: f32x4.mul $push2=, $1, $3 +; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5 +; NOFP16-NEXT: v128.store 0($0), $pop3 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fmuladd_8xf32: +; NOSIMD: .functype fmuladd_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $8, $16 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $24 +; NOSIMD-NEXT: f32.store 28($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $7, $15 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $23 +; NOSIMD-NEXT: f32.store 24($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $6, $14 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $22 +; NOSIMD-NEXT: f32.store 20($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $5, $13 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $21 +; NOSIMD-NEXT: f32.store 16($0), $pop7 +; NOSIMD-NEXT: f32.mul $push8=, $4, $12 +; NOSIMD-NEXT: f32.add $push9=, $pop8, $20 +; NOSIMD-NEXT: f32.store 12($0), $pop9 +; NOSIMD-NEXT: f32.mul $push10=, $3, $11 +; NOSIMD-NEXT: f32.add $push11=, $pop10, $19 +; NOSIMD-NEXT: f32.store 8($0), $pop11 +; NOSIMD-NEXT: f32.mul $push12=, $2, $10 +; NOSIMD-NEXT: f32.add $push13=, $pop12, $18 +; NOSIMD-NEXT: f32.store 4($0), $pop13 +; NOSIMD-NEXT: f32.mul $push14=, $1, $9 +; NOSIMD-NEXT: f32.add $push15=, $pop14, $17 +; NOSIMD-NEXT: f32.store 0($0), $pop15 +; NOSIMD-NEXT: return + %fma = call <8 x float> @llvm.fmuladd(<8 x float> %a, <8 x float> %b, <8 x float> %c) + ret <8 x float> %fma +} + +define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_contract_2xf64: +; RELAXED: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_contract_2xf64: +; STRICT: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_contract_2xf64: +; NOFP16: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $0, $1 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_contract_2xf64: +; NOSIMD: .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $2, $4 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $1, $3 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} + +define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_2xf64: +; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_2xf64: +; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 +; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fmuladd_2xf64: +; NOFP16: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $0, $1 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fmuladd_2xf64: +; NOSIMD: .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $2, $4 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $1, $3 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} + define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; RELAXED-LABEL: fma_4xf32: ; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128) @@ -167,6 +1283,44 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15 ; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18 ; STRICT-NEXT: return $pop19 +; +; NOFP16-LABEL: fma_4xf32: +; NOFP16: .functype fma_4xf32 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.extract_lane $push2=, $0, 0 +; NOFP16-NEXT: f32x4.extract_lane $push1=, $1, 0 +; NOFP16-NEXT: f32x4.extract_lane $push0=, $2, 0 +; NOFP16-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0 +; NOFP16-NEXT: f32x4.splat $push4=, $pop3 +; NOFP16-NEXT: f32x4.extract_lane $push7=, $0, 1 +; NOFP16-NEXT: f32x4.extract_lane $push6=, $1, 1 +; NOFP16-NEXT: f32x4.extract_lane $push5=, $2, 1 +; NOFP16-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5 +; NOFP16-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8 +; NOFP16-NEXT: f32x4.extract_lane $push12=, $0, 2 +; NOFP16-NEXT: f32x4.extract_lane $push11=, $1, 2 +; NOFP16-NEXT: f32x4.extract_lane $push10=, $2, 2 +; NOFP16-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10 +; NOFP16-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13 +; NOFP16-NEXT: f32x4.extract_lane $push17=, $0, 3 +; NOFP16-NEXT: f32x4.extract_lane $push16=, $1, 3 +; NOFP16-NEXT: f32x4.extract_lane $push15=, $2, 3 +; NOFP16-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15 +; NOFP16-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18 +; NOFP16-NEXT: return $pop19 +; +; NOSIMD-LABEL: fma_4xf32: +; NOSIMD: .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fmaf, $4, $8, $12 +; NOSIMD-NEXT: f32.store 12($0), $pop0 +; NOSIMD-NEXT: call $push1=, fmaf, $3, $7, $11 +; NOSIMD-NEXT: f32.store 8($0), $pop1 +; NOSIMD-NEXT: call $push2=, fmaf, $2, $6, $10 +; NOSIMD-NEXT: f32.store 4($0), $pop2 +; NOSIMD-NEXT: call $push3=, fmaf, $1, $5, $9 +; NOSIMD-NEXT: f32.store 0($0), $pop3 +; NOSIMD-NEXT: return %fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %fma } @@ -176,9 +1330,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; RELAXED-LABEL: fadd_fmul_contract_8xf32: ; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2 +; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6 ; RELAXED-NEXT: v128.store 16($0), $pop0 -; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1 +; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5 ; RELAXED-NEXT: v128.store 0($0), $pop1 ; RELAXED-NEXT: return ; @@ -192,17 +1346,56 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; STRICT-NEXT: f32x4.add $push3=, $pop2, $5 ; STRICT-NEXT: v128.store 0($0), $pop3 ; STRICT-NEXT: return +; +; NOFP16-LABEL: fadd_fmul_contract_8xf32: +; NOFP16: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f32x4.mul $push0=, $4, $2 +; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6 +; NOFP16-NEXT: v128.store 16($0), $pop1 +; NOFP16-NEXT: f32x4.mul $push2=, $3, $1 +; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5 +; NOFP16-NEXT: v128.store 0($0), $pop3 +; NOFP16-NEXT: return +; +; NOSIMD-LABEL: fadd_fmul_contract_8xf32: +; NOSIMD: .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f32.mul $push0=, $16, $8 +; NOSIMD-NEXT: f32.add $push1=, $pop0, $24 +; NOSIMD-NEXT: f32.store 28($0), $pop1 +; NOSIMD-NEXT: f32.mul $push2=, $15, $7 +; NOSIMD-NEXT: f32.add $push3=, $pop2, $23 +; NOSIMD-NEXT: f32.store 24($0), $pop3 +; NOSIMD-NEXT: f32.mul $push4=, $14, $6 +; NOSIMD-NEXT: f32.add $push5=, $pop4, $22 +; NOSIMD-NEXT: f32.store 20($0), $pop5 +; NOSIMD-NEXT: f32.mul $push6=, $13, $5 +; NOSIMD-NEXT: f32.add $push7=, $pop6, $21 +; NOSIMD-NEXT: f32.store 16($0), $pop7 +; NOSIMD-NEXT: f32.mul $push8=, $12, $4 +; NOSIMD-NEXT: f32.add $push9=, $pop8, $20 +; NOSIMD-NEXT: f32.store 12($0), $pop9 +; NOSIMD-NEXT: f32.mul $push10=, $11, $3 +; NOSIMD-NEXT: f32.add $push11=, $pop10, $19 +; NOSIMD-NEXT: f32.store 8($0), $pop11 +; NOSIMD-NEXT: f32.mul $push12=, $10, $2 +; NOSIMD-NEXT: f32.add $push13=, $pop12, $18 +; NOSIMD-NEXT: f32.store 4($0), $pop13 +; NOSIMD-NEXT: f32.mul $push14=, $9, $1 +; NOSIMD-NEXT: f32.add $push15=, $pop14, $17 +; NOSIMD-NEXT: f32.store 0($0), $pop15 +; NOSIMD-NEXT: return %mul = fmul contract <8 x float> %b, %a %add = fadd contract <8 x float> %mul, %c ret <8 x float> %add } - define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { ; RELAXED-LABEL: fadd_fmul_contract_2xf64: ; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0 +; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fadd_fmul_contract_2xf64: @@ -211,28 +1404,64 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, ; STRICT-NEXT: f64x2.mul $push0=, $1, $0 ; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 +; +; NOFP16-LABEL: fadd_fmul_contract_2xf64: +; NOFP16: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $1, $0 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_contract_2xf64: +; NOSIMD: .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $4, $2 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $3, $1 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return %mul = fmul contract <2 x double> %b, %a %add = fadd contract <2 x double> %mul, %c ret <2 x double> %add } -define float @fadd_fmul_contract_f32(float %a, float %b, float %c) { -; RELAXED-LABEL: fadd_fmul_contract_f32: -; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fadd_fmul_2xf64: +; RELAXED: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32.mul $push0=, $1, $0 -; RELAXED-NEXT: f32.add $push1=, $pop0, $2 +; RELAXED-NEXT: f64x2.mul $push0=, $1, $0 +; RELAXED-NEXT: f64x2.add $push1=, $pop0, $2 ; RELAXED-NEXT: return $pop1 ; -; STRICT-LABEL: fadd_fmul_contract_f32: -; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-LABEL: fadd_fmul_2xf64: +; STRICT: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f32.mul $push0=, $1, $0 -; STRICT-NEXT: f32.add $push1=, $pop0, $2 +; STRICT-NEXT: f64x2.mul $push0=, $1, $0 +; STRICT-NEXT: f64x2.add $push1=, $pop0, $2 ; STRICT-NEXT: return $pop1 - %mul = fmul contract float %b, %a - %add = fadd contract float %mul, %c - ret float %add +; +; NOFP16-LABEL: fadd_fmul_2xf64: +; NOFP16: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: f64x2.mul $push0=, $1, $0 +; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2 +; NOFP16-NEXT: return $pop1 +; +; NOSIMD-LABEL: fadd_fmul_2xf64: +; NOSIMD: .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> () +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: f64.mul $push0=, $4, $2 +; NOSIMD-NEXT: f64.add $push1=, $pop0, $6 +; NOSIMD-NEXT: f64.store 8($0), $pop1 +; NOSIMD-NEXT: f64.mul $push2=, $3, $1 +; NOSIMD-NEXT: f64.add $push3=, $pop2, $5 +; NOSIMD-NEXT: f64.store 0($0), $pop3 +; NOSIMD-NEXT: return + %mul = fmul <2 x double> %b, %a + %add = fadd <2 x double> %mul, %c + ret <2 x double> %add } define float @fma_f32(float %a, float %b, float %c) { @@ -247,6 +1476,18 @@ define float @fma_f32(float %a, float %b, float %c) { ; STRICT-NEXT: # %bb.0: ; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2 ; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fma_f32: +; NOFP16: .functype fma_f32 (f32, f32, f32) -> (f32) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, fmaf, $0, $1, $2 +; NOFP16-NEXT: return $pop0 +; +; NOSIMD-LABEL: fma_f32: +; NOSIMD: .functype fma_f32 (f32, f32, f32) -> (f32) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fmaf, $0, $1, $2 +; NOSIMD-NEXT: return $pop0 %fma = call float @llvm.fma(float %a, float %b, float %c) ret float %fma } @@ -263,6 +1504,18 @@ define double @fma_f64(double %a, double %b, double %c) { ; STRICT-NEXT: # %bb.0: ; STRICT-NEXT: call $push0=, fma, $0, $1, $2 ; STRICT-NEXT: return $pop0 +; +; NOFP16-LABEL: fma_f64: +; NOFP16: .functype fma_f64 (f64, f64, f64) -> (f64) +; NOFP16-NEXT: # %bb.0: +; NOFP16-NEXT: call $push0=, fma, $0, $1, $2 +; NOFP16-NEXT: return $pop0 +; +; NOSIMD-LABEL: fma_f64: +; NOSIMD: .functype fma_f64 (f64, f64, f64) -> (f64) +; NOSIMD-NEXT: # %bb.0: +; NOSIMD-NEXT: call $push0=, fma, $0, $1, $2 +; NOSIMD-NEXT: return $pop0 %fma = call double @llvm.fma(double %a, double %b, double %c) ret double %fma } diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll index 6e2d860..b90c1da 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll @@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 ; RELAXED-LABEL: fsub_fmul_contract_4xf32: ; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_4xf32: @@ -46,15 +46,14 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h ; RELAXED-LABEL: fsub_fmul_contract_8xf16: ; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f16x8.nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_8xf16: ; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) ; STRICT-NEXT: # %bb.0: -; STRICT-NEXT: f16x8.mul $push0=, $1, $0 -; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0 -; STRICT-NEXT: return $pop1 +; STRICT-NEXT: f16x8.nmadd $push0=, $1, $0, $2 +; STRICT-NEXT: return $pop0 %mul = fmul contract <8 x half> %b, %a %sub = fsub contract <8 x half> %c, %mul ret <8 x half> %sub @@ -84,9 +83,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 ; RELAXED-LABEL: fsub_fmul_contract_8xf32: ; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6 ; RELAXED-NEXT: v128.store 16($0), $pop0 -; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5 ; RELAXED-NEXT: v128.store 0($0), $pop1 ; RELAXED-NEXT: return ; @@ -110,7 +109,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, ; RELAXED-LABEL: fsub_fmul_contract_2xf64: ; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) ; RELAXED-NEXT: # %bb.0: -; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2 ; RELAXED-NEXT: return $pop0 ; ; STRICT-LABEL: fsub_fmul_contract_2xf64: @@ -143,3 +142,55 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) { ret float %sub } +define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fmuladd_8xf16: +; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_8xf16: +; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.nmadd $push0=, $0, $1, $2 +; STRICT-NEXT: return $pop0 + %fneg = fneg <8 x half> %a + %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c) + ret <8 x half> %fma +} + +define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fmuladd_4xf32: +; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_4xf32: +; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $0, $1 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %fneg = fneg <4 x float> %a + %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c) + ret <4 x float> %fma +} + +define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fmuladd_2xf64: +; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $0, $1, $2 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fmuladd_2xf64: +; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $0, $1 +; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %fneg = fneg <2 x double> %a + %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c) + ret <2 x double> %fma +} diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll index a0c243b..f3950b7 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll @@ -1,16 +1,15 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -;; A minimal test case. llc will crash if global variables already has a section -;; prefix. Subsequent PRs will expand on this test case to test the hotness -;; reconciliation implementation. - -; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ +;; A minimal test case. Subsequent PRs will expand on this test case +;; (e.g., with more functions, variables and profiles) and test the hotness +;; reconcillation implementation. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \ ; RUN: -partition-static-data-sections=true \ ; RUN: -data-sections=true -unique-section-names=false \ -; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR -; ERR: Global variable hot_bss already has a section prefix hot +; IR: .section .bss.hot.,"aw" @hot_bss = internal global i32 0, !section_prefix !17 diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll index ce06d17..604b4fd 100644 --- a/llvm/test/CodeGen/X86/global-variable-partition.ll +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu" ; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8 ; AGG-NEXT: .section .data.unlikely.,"aw",@progbits +;; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; For @data_with_unknown_hotness ; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness ; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits ; UNIQ: .section .data,"aw",@progbits,unique,9 -; The `.section` directive is omitted for .data with -unique-section-names=false. -; See MCSectionELF::shouldOmitSectionDirective for the implementation details. + ; AGG: .data ; COMMON: .Ldata_with_unknown_hotness: -; For @hot_data_custom_bar_section -; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix. +; For variables that are not eligible for section prefix annotation ; COMMON: .type hot_data_custom_bar_section,@object ; SYM-NEXT: .section bar,"aw",@progbits ; SYM: hot_data_custom_bar_section ; UNIQ: .section bar,"aw",@progbits ; AGG: .section bar,"aw",@progbits +; SYM: .section .data.llvm.fake_var,"aw" +; UNIQ: .section .data,"aw" +; AGG: .data + +;; No section for linker declaration +; COMMON-NOT: qux + @.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 @.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 @hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] @@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu" @data3 = internal global i32 3 @data_with_unknown_hotness = private global i32 5 @hot_data_custom_bar_section = internal global i32 101 #0 +@llvm.fake_var = internal global i32 123 +@qux = external global i64 define void @cold_func(i32 %0) !prof !15 { %2 = load i32, ptr @cold_bss diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 5aa266d..69abf6e 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1447,3 +1447,158 @@ define i1 @eq_i512_load_arg(ptr%p, i512 %b) { %r = icmp eq i512 %a, %b ret i1 %r } + +; Tests for any/allbits from memory. + +define i1 @anybits_i128_load_arg(ptr %w) { +; ANY-LABEL: anybits_i128_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: orq 8(%rdi), %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp ne i128 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i128_load_arg(ptr %w) { +; SSE2-LABEL: allbits_i128_load_arg: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqb (%rdi), %xmm0 +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: sete %al +; SSE2-NEXT: retq +; +; SSE41-LABEL: allbits_i128_load_arg: +; SSE41: # %bb.0: +; SSE41-NEXT: movdqa (%rdi), %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: ptest %xmm1, %xmm0 +; SSE41-NEXT: setb %al +; SSE41-NEXT: retq +; +; AVXANY-LABEL: allbits_i128_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqa (%rdi), %xmm0 +; AVXANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVXANY-NEXT: vptest %xmm1, %xmm0 +; AVXANY-NEXT: setb %al +; AVXANY-NEXT: retq + %ld = load i128, ptr %w + %cmp = icmp eq i128 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i256_load_arg(ptr %w) { +; ANY-LABEL: anybits_i256_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq (%rdi), %rax +; ANY-NEXT: movq 8(%rdi), %rcx +; ANY-NEXT: orq 24(%rdi), %rcx +; ANY-NEXT: orq 16(%rdi), %rax +; ANY-NEXT: orq %rcx, %rax +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp ne i256 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i256_load_arg(ptr %w) { +; SSE-LABEL: allbits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: andq 24(%rdi), %rcx +; SSE-NEXT: andq 16(%rdi), %rax +; SSE-NEXT: andq %rcx, %rax +; SSE-NEXT: cmpq $-1, %rax +; SSE-NEXT: sete %al +; SSE-NEXT: retq +; +; AVX1-LABEL: allbits_i256_load_arg: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovdqu (%rdi), %ymm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 +; AVX1-NEXT: vptest %ymm1, %ymm0 +; AVX1-NEXT: setb %al +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: allbits_i256_load_arg: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovdqu (%rdi), %ymm0 +; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX2-NEXT: vptest %ymm1, %ymm0 +; AVX2-NEXT: setb %al +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: allbits_i256_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu (%rdi), %ymm0 +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: setb %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i256, ptr %w + %cmp = icmp eq i256 %ld, -1 + ret i1 %cmp +} + +define i1 @anybits_i512_load_arg(ptr %w) { +; ANY-LABEL: anybits_i512_load_arg: +; ANY: # %bb.0: +; ANY-NEXT: movq 16(%rdi), %rax +; ANY-NEXT: movq (%rdi), %rcx +; ANY-NEXT: movq 8(%rdi), %rdx +; ANY-NEXT: movq 24(%rdi), %rsi +; ANY-NEXT: orq 56(%rdi), %rsi +; ANY-NEXT: orq 40(%rdi), %rdx +; ANY-NEXT: orq %rsi, %rdx +; ANY-NEXT: orq 48(%rdi), %rax +; ANY-NEXT: orq 32(%rdi), %rcx +; ANY-NEXT: orq %rax, %rcx +; ANY-NEXT: orq %rdx, %rcx +; ANY-NEXT: setne %al +; ANY-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp ne i512 %ld, 0 + ret i1 %cmp +} + +define i1 @allbits_i512_load_arg(ptr %w) { +; NO512-LABEL: allbits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: andq 56(%rdi), %rsi +; NO512-NEXT: andq 40(%rdi), %rdx +; NO512-NEXT: andq %rsi, %rdx +; NO512-NEXT: andq 48(%rdi), %rax +; NO512-NEXT: andq 32(%rdi), %rcx +; NO512-NEXT: andq %rax, %rcx +; NO512-NEXT: andq %rdx, %rcx +; NO512-NEXT: cmpq $-1, %rcx +; NO512-NEXT: sete %al +; NO512-NEXT: retq +; +; AVX512-LABEL: allbits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1 +; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: sete %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %ld = load i512, ptr %w + %cmp = icmp eq i512 %ld, -1 + ret i1 %cmp +} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 48aec4b..57da338 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -917,11 +917,11 @@ main: # CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02] f16x8.nearest - # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02] - f16x8.relaxed_madd + # CHECK: f16x8.madd # encoding: [0xfd,0xce,0x02] + f16x8.madd - # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02] - f16x8.relaxed_nmadd + # CHECK: f16x8.nmadd # encoding: [0xfd,0xcf,0x02] + f16x8.nmadd # CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02] i16x8.trunc_sat_f16x8_s diff --git a/llvm/test/TableGen/listsplat.td b/llvm/test/TableGen/listsplat.td index 5a93a4c..43803d6 100644 --- a/llvm/test/TableGen/listsplat.td +++ b/llvm/test/TableGen/listsplat.td @@ -1,4 +1,5 @@ // RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s // CHECK: ------------- Classes ----------------- // CHECK-NEXT: class X<int X:a = ?, int X:b = ?> { @@ -73,3 +74,8 @@ def DYa1 : Y<"a", 1>; def DYa2 : Y<"a", 2>; def DZ : X<42, !size([1, 2, 3])>; + +#ifdef ERROR1 +// ERROR1: !listsplat count -1 is negative +defvar E = !listsplat("", -1); +#endif diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll index 29198f34..205184b 100644 --- a/llvm/test/Transforms/PGOProfile/data-access-profile.ll +++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll @@ -3,55 +3,72 @@ ; RUN: rm -rf %t && split-file %s %t && cd %t -;; Read a text profile and merge it into indexed profile. +;; Read text profiles and merge them into indexed profiles. ; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata +; RUN: llvm-profdata merge --memprof-version=4 memprof-no-dap.yaml -o memprof-no-dap.profdata ;; Run optimizer pass on an IR module without IR functions, and test that global ;; variables in the module could be annotated (i.e., no early return), ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ -; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT +; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT ;; Run optimizer pass on the IR, and check the section prefix. ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \ -; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT +; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT -;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix. -;; The output text IR shouldn't have `section_prefix` +;; Run memprof without providing memprof data. Test that IR has module flag +;; `EnableDataAccessProf` as 0. +; RUN: opt -passes='memprof-use<profile-filename=memprof-no-dap.profdata>' -memprof-annotate-static-data-prefix \ +; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefix=FLAG + +;; Run memprof without explicitly setting -memprof-annotate-static-data-prefix. +;; The output text IR shouldn't have `section_prefix` or EnableDataAccessProf module flag. ; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \ -; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix" +; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --check-prefix=FLAGLESS --implicit-check-not="section_prefix" ; LOG: Skip annotating string literal .str ; LOG: Global variable var1 is annotated as hot ; LOG: Global variable var2.llvm.125 is annotated as hot ; LOG: Global variable bar is not annotated ; LOG: Global variable foo is annotated as unlikely -; LOG: Global variable var3 has explicit section name. Skip annotating. -; LOG: Global variable var4 has explicit section name. Skip annotating. +; LOG: Skip annotation for var3 due to explicit section name. +; LOG: Skip annotation for var4 due to explicit section name. +; LOG: Skip annotation for llvm.fake_var due to name starts with `llvm.`. +; LOG: Skip annotation for qux due to linker declaration. ;; String literals are not annotated. -; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde" -; PREFIX-NOT: section_prefix -; PREFIX: @var1 = global i32 123, !section_prefix !0 +; IR: @.str = unnamed_addr constant [5 x i8] c"abcde" +; IR-NOT: section_prefix +; IR: @var1 = global i32 123, !section_prefix !0 ;; @var.llvm.125 will be canonicalized to @var2 for profile look-up. -; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 +; IR-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0 ;; @bar is not seen in hot symbol or known symbol set, so it won't get a section ;; prefix. Test this by testing that there is no section_prefix between @bar and ;; @foo. -; PREFIX-NEXT: @bar = global i16 3 -; PREFIX-NOT: !section_prefix +; IR-NEXT: @bar = global i16 3 +; IR-NOT: !section_prefix ;; @foo is unlikely. -; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1 +; IR-NEXT: @foo = global i8 2, !section_prefix !1 + +; IR-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" +; IR-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 + +; IR: @llvm.fake_var = global i32 123 +; IR-NOT: !section_prefix +; IR: @qux = external global i64 +; IR-NOT: !section_prefix -; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1" -; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0 +; IR: attributes #0 = { "rodata-section"="sec2" } -; PREFIX: attributes #0 = { "rodata-section"="sec2" } +; IR: !0 = !{!"section_prefix", !"hot"} +; IR-NEXT: !1 = !{!"section_prefix", !"unlikely"} +; IR-NEXT: !2 = !{i32 2, !"EnableDataAccessProf", i32 1} -; PREFIX: !0 = !{!"section_prefix", !"hot"} -; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"} +; FLAG: !{i32 2, !"EnableDataAccessProf", i32 0} +; FLAGLESS-NOT: EnableDataAccessProf ; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix. ; STAT: 2 memprof - Number of global vars with user-specified section (not annotated). @@ -72,6 +89,24 @@ DataAccessProfiles: - foo KnownColdStrHashes: [ 999, 1001 ] ... +;--- memprof-no-dap.yaml +--- +# A memprof file with without data access profiles. The heap records are simplified +# to pass profile parsing and don't need to match the IR. +HeapProfileRecords: + - GUID: 0xdeadbeef12345678 + AllocSites: + - Callstack: + - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true } + MemInfoBlock: + AllocCount: 111 + TotalSize: 222 + TotalLifetime: 333 + TotalLifetimeAccessDensity: 444 + CallSites: + - Frames: + - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true } +... ;--- input.ll target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" @@ -84,11 +119,14 @@ target triple = "x86_64-unknown-linux-gnu" @foo = global i8 2 @var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" @var4 = constant [1 x i64][i64 98765] #0 +@llvm.fake_var = global i32 123 +@qux = external global i64 define i32 @func() { %a = load i32, ptr @var1 %b = load i32, ptr @var2.llvm.125 - %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b) + %c = load i32, ptr @llvm.fake_var + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c) ret i32 %ret } @@ -108,5 +146,8 @@ target triple = "x86_64-unknown-linux-gnu" @foo = global i8 2 @var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1" @var4 = constant [1 x i64][i64 98765] #0 +@llvm.fake_var = global i32 123 +@qux = external global i64 + attributes #0 = { "rodata-section"="sec2" } |