aboutsummaryrefslogtreecommitdiff
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/sched.group.classification.mir59
-rw-r--r--llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir9
-rw-r--r--llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir2
-rw-r--r--llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll (renamed from llvm/test/CodeGen/PowerPC/check-zero-vector.ll)77
-rw-r--r--llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll4
-rw-r--r--llvm/test/CodeGen/RISCV/i64-icmp.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll18
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir523
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll52
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll20
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll72
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll23
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll22
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll114
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll114
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll24
-rw-r--r--llvm/test/CodeGen/RISCV/select-to-and-zext.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/setcc-logic.ll5
-rw-r--r--llvm/test/CodeGen/RISCV/sext-zext-trunc.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/xaluo.ll12
-rw-r--r--llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll48
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll106
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll1309
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll67
-rw-r--r--llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll13
-rw-r--r--llvm/test/CodeGen/X86/global-variable-partition.ll18
-rw-r--r--llvm/test/CodeGen/X86/setcc-wide-types.ll155
-rw-r--r--llvm/test/MC/WebAssembly/simd-encodings.s8
-rw-r--r--llvm/test/TableGen/listsplat.td6
-rw-r--r--llvm/test/Transforms/PGOProfile/data-access-profile.ll83
46 files changed, 3232 insertions, 96 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir
new file mode 100644
index 0000000..a4aad57
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched.group.classification.mir
@@ -0,0 +1,59 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s
+
+---
+name: buffer_load_lds_not_valu
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: buffer_load_lds_not_valu
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $exec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF2]], [[DEF3]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF3]], [[V_ADD_U32_e32_]], implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 0
+ ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
+ ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 1
+ ; CHECK-NEXT: BUFFER_LOAD_DWORDX4_LDS_OFFEN [[DEF]], [[DEF1]], 0, 0, 0, 0, implicit $exec, implicit $m0
+ ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_3]], [[V_ADD_U32_e32_4]], implicit $exec
+ ; CHECK-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]], implicit $exec
+ ; CHECK-NEXT: dead [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_ADD_U32_e32_5]], [[V_ADD_U32_e32_6]], implicit $exec
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 2, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 4, 1, 0
+ ; CHECK-NEXT: SCHED_GROUP_BARRIER 2, 4, 0
+ ; CHECK-NEXT: S_ENDPGM 0
+ $exec = IMPLICIT_DEF
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:sgpr_128 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = V_ADD_U32_e32 %2, %3, implicit $exec
+ %5:vgpr_32 = V_ADD_U32_e32 %3, %4, implicit $exec
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
+ $m0 = S_MOV_B32 1
+ BUFFER_LOAD_DWORDX4_LDS_OFFEN %0, %1, 0, 0, 0, 0, implicit $exec, implicit $m0
+ %6:vgpr_32 = V_ADD_U32_e32 %4, %5, implicit $exec
+ %7:vgpr_32 = V_ADD_U32_e32 %5, %6, implicit $exec
+ %8:vgpr_32 = V_ADD_U32_e32 %6, %7, implicit $exec
+ %9:vgpr_32 = V_ADD_U32_e32 %7, %8, implicit $exec
+ %10:vgpr_32 = V_ADD_U32_e32 %8, %9, implicit $exec
+ %11:vgpr_32 = V_ADD_U32_e32 %9, %10, implicit $exec
+ SCHED_GROUP_BARRIER 2, 2, 0
+ SCHED_GROUP_BARRIER 4, 1 ,0
+ SCHED_GROUP_BARRIER 2, 2, 0
+ SCHED_GROUP_BARRIER 4, 1 ,0
+ SCHED_GROUP_BARRIER 2, 4, 0
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
index 9553fcc..f11fe4a 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-phys-copy.mir
@@ -59,6 +59,15 @@ body: |
...
---
+name: src_shared_base_to_vcc
+body: |
+ bb.0:
+ ; GFX9-LABEL: name: src_shared_base_to_vcc
+ ; GFX9: $vcc = S_MOV_B64 $src_shared_base
+ $vcc = COPY $src_shared_base
+...
+
+---
name: sgpr96_aligned_src_dst
body: |
bb.0:
diff --git a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
index 1030917..302f70f 100644
--- a/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
+++ b/llvm/test/CodeGen/MIR/AArch64/return-address-signing.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=prologepilog -run-pass=aarch64-ptrauth -o - %s 2>&1 | FileCheck --strict-whitespace %s
--- |
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64"
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll
index 0f7e0c7..1325abf 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll
@@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1
!6 = !{!"short", !7, i64 0}
!7 = !{!"omnipotent char", !8, i64 0}
!8 = !{!"Simple C/C++ TBAA"}
+
+; Function to lockdown changes for floating point vector comparisons
+define range(i32 0, 5) i32 @cols_needed(ptr %colauths){
+; POWERPC_64LE-LABEL: cols_needed:
+; POWERPC_64LE: # %bb.0: # %entry
+; POWERPC_64LE-NEXT: lxv vs0, 0(r3)
+; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1
+; POWERPC_64LE-NEXT: li r4, 4
+; POWERPC_64LE-NEXT: li r3, 0
+; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1
+; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0
+; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
+; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2
+; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30
+; POWERPC_64LE-NEXT: sub r3, r4, r3
+; POWERPC_64LE-NEXT: mfvsrwz r4, v2
+; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29
+; POWERPC_64LE-NEXT: or r3, r3, r4
+; POWERPC_64LE-NEXT: li r4, 12
+; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2
+; POWERPC_64LE-NEXT: slwi r4, r4, 3
+; POWERPC_64LE-NEXT: or r3, r3, r4
+; POWERPC_64LE-NEXT: clrlwi r3, r3, 28
+; POWERPC_64LE-NEXT: stb r3, -1(r1)
+; POWERPC_64LE-NEXT: lbz r3, -1(r1)
+; POWERPC_64LE-NEXT: popcntd r3, r3
+; POWERPC_64LE-NEXT: blr
+;
+; POWERPC_64-LABEL: cols_needed:
+; POWERPC_64: # %bb.0: # %entry
+; POWERPC_64-NEXT: lxv vs0, 0(r3)
+; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1
+; POWERPC_64-NEXT: li r4, 8
+; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1
+; POWERPC_64-NEXT: xxlnor v2, vs0, vs0
+; POWERPC_64-NEXT: vextuwlx r4, r4, v2
+; POWERPC_64-NEXT: mfvsrwz r3, v2
+; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30
+; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29
+; POWERPC_64-NEXT: li r3, 0
+; POWERPC_64-NEXT: vextuwlx r3, r3, v2
+; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28
+; POWERPC_64-NEXT: li r3, 12
+; POWERPC_64-NEXT: vextuwlx r3, r3, v2
+; POWERPC_64-NEXT: sub r3, r4, r3
+; POWERPC_64-NEXT: clrlwi r3, r3, 28
+; POWERPC_64-NEXT: stb r3, -1(r1)
+; POWERPC_64-NEXT: lbz r3, -1(r1)
+; POWERPC_64-NEXT: popcntd r3, r3
+; POWERPC_64-NEXT: blr
+;
+; POWERPC_32-LABEL: cols_needed:
+; POWERPC_32: # %bb.0: # %entry
+; POWERPC_32-NEXT: lxv vs0, 0(r3)
+; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1
+; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1
+; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0
+; POWERPC_32-NEXT: stxv vs0, -32(r1)
+; POWERPC_32-NEXT: lwz r3, -24(r1)
+; POWERPC_32-NEXT: lwz r4, -28(r1)
+; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30
+; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29
+; POWERPC_32-NEXT: lwz r4, -32(r1)
+; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28
+; POWERPC_32-NEXT: lwz r4, -20(r1)
+; POWERPC_32-NEXT: sub r3, r3, r4
+; POWERPC_32-NEXT: clrlwi r3, r3, 28
+; POWERPC_32-NEXT: popcntw r3, r3
+; POWERPC_32-NEXT: blr
+entry:
+ %0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5
+ %1 = fcmp une <4 x float> %0, zeroinitializer
+ %2 = bitcast <4 x i1> %1 to i4
+ %3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2)
+ %4 = zext nneg i4 %3 to i32
+ ret i32 %4
+}
diff --git a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
index 2a46a59..4f036d3 100644
--- a/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
+++ b/llvm/test/CodeGen/RISCV/and-negpow2-cmp.ll
@@ -221,8 +221,8 @@ define i64 @test12(i64 %0) #0 {
;
; RV64-LABEL: test12:
; RV64: # %bb.0: # %entry
-; RV64-NEXT: addiw a0, a0, -16
-; RV64-NEXT: addi a0, a0, 13
+; RV64-NEXT: addi a0, a0, -16
+; RV64-NEXT: addiw a0, a0, 13
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/RISCV/i64-icmp.ll b/llvm/test/CodeGen/RISCV/i64-icmp.ll
index 88d989d..2742b9a 100644
--- a/llvm/test/CodeGen/RISCV/i64-icmp.ll
+++ b/llvm/test/CodeGen/RISCV/i64-icmp.ll
@@ -708,8 +708,7 @@ define i64 @icmp_sle_constant_neg_2050(i64 %a) nounwind {
define i64 @icmp_eq_zext_inreg_small_constant(i64 %a) nounwind {
; RV64I-LABEL: icmp_eq_zext_inreg_small_constant:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: addi a0, a0, -123
+; RV64I-NEXT: addiw a0, a0, -123
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: ret
%1 = and i64 %a, 4294967295
@@ -748,8 +747,7 @@ define i64 @icmp_ne_zext_inreg_small_constant(i64 %a) nounwind {
define i64 @icmp_ne_zext_inreg_large_constant(i64 %a) nounwind {
; RV64I-LABEL: icmp_ne_zext_inreg_large_constant:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: addi a0, a0, 2
+; RV64I-NEXT: addiw a0, a0, 2
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: ret
%1 = and i64 %a, 4294967295
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll
new file mode 100644
index 0000000..d9a49a1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll
@@ -0,0 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+v -O0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64
+
+define void @matmul() {
+; CHECK-RV64-LABEL: matmul:
+; CHECK-RV64: # %bb.0: # %entry
+; CHECK-RV64-NEXT: li a0, 0
+; CHECK-RV64-NEXT: vsetvli zero, a0, 512
+; CHECK-RV64-NEXT: sf.vsettm zero, a0
+; CHECK-RV64-NEXT: sf.vtzero.t mt0
+; CHECK-RV64-NEXT: ret
+entry:
+ call void @llvm.riscv.sf.vtzero.t.i64(i64 0, i64 0, i64 0, i64 3, i64 1)
+ ret void
+}
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn
+declare void @llvm.riscv.sf.vtzero.t.i64(i64 immarg, i64, i64, i64 immarg, i64 immarg) #0
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir
new file mode 100644
index 0000000..389283a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive-xsfmm-vset-insert.mir
@@ -0,0 +1,523 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v \
+# RUN: -run-pass=phi-node-elimination,register-coalescer,riscv-insert-vsetvli | FileCheck %s
+
+--- |
+ define void @xsfmm_same_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 noundef %tm, i64 noundef %tn, i64 noundef %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ ret void
+ }
+
+ define void @xsfmm_different_state(<vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 4)
+ ret void
+ }
+
+ define void @xsfmm_different_state_bf(<vscale x 32 x half> %tile1, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64 2, <vscale x 32 x bfloat> %tile2, <vscale x 32 x bfloat> %tile2, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 2, <vscale x 32 x half> %tile1, <vscale x 32 x half> %tile1, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ ret void
+ }
+
+ define <vscale x 64 x i8> @interleave_rvv_and_xsfmm(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) {
+ entry:
+ %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl)
+ %1 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl)
+ call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl)
+ ret <vscale x 64 x i8> %1
+ }
+
+ define <vscale x 64 x i8> @interleave_rvv_and_xsfmm2(<vscale x 64 x i8> %tile, i64 %vl, ptr %base) {
+ entry:
+ %0 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %tile, i64 %vl)
+ %1 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64 1, i64 %vl)
+ %2 = call <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> poison, <vscale x 64 x i8> %tile, <vscale x 64 x i8> %0, i64 %vl)
+ call void @llvm.riscv.sf.vste16.i64(i64 1, ptr %base, i64 %vl)
+ ret <vscale x 64 x i8> %2
+ }
+
+ define void @consecutive_xsfmm(<vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, ptr %base) {
+ entry:
+ tail call void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64 0, <vscale x 32 x half> %tile, <vscale x 32 x half> %tile, i64 %tm, i64 %tn, i64 %tk, i64 2)
+ call void @llvm.riscv.sf.vste16.i64(i64 0, ptr %base, i64 %tn)
+ ret void
+ }
+
+ define i64 @vsettnt_max(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2)
+ %1 = call i64 @llvm.riscv.sf.vsettnt_max.i64(i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettm(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettm.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettn(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettn.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define i64 @single_vsettk(i64 %vl) {
+ entry:
+ %0 = call i64 @llvm.riscv.sf.vsettk.i64(i64 %vl, i64 1, i64 2)
+ ret i64 %0
+ }
+
+ define void @sf_vtzero(i64 %tm, i64 %tn) {
+ entry:
+ call void @llvm.riscv.sf.vtzero.i64(i64 1, i64 %tm, i64 %tn, i64 3, i64 4)
+ ret void
+ }
+
+ declare void @llvm.riscv.sf.mm.f.f.i64.nxv32f16(i64, <vscale x 32 x half>, <vscale x 32 x half>, i64, i64, i64, i64)
+ declare void @llvm.riscv.sf.mm.f.f.i64.nxv32bf16(i64, <vscale x 32 x bfloat>, <vscale x 32 x bfloat>, i64, i64, i64, i64)
+ declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.i64(i64, i64)
+ declare <vscale x 64 x i8> @llvm.riscv.vadd.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>, <vscale x 64 x i8>, <vscale x 64 x i8>, i64)
+ declare void @llvm.riscv.sf.vste16.i64(i64, ptr, i64)
+ declare i64 @llvm.riscv.sf.vsettnt_max.i64(i64, i64)
+ declare i64 @llvm.riscv.sf.vsettm.i64(i64, i64, i64)
+ declare i64 @llvm.riscv.sf.vsettn.i64(i64, i64, i64)
+ declare i64 @llvm.riscv.sf.vsettk.i64(i64, i64, i64)
+ declare void @llvm.riscv.sf.vtzero.i64(i64, i64, i64, i64, i64)
+...
+---
+name: xsfmm_same_state
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_same_state
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoRET
+...
+---
+name: xsfmm_different_state
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_different_state
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1544 /* e16, w4 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 4, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 4, implicit $frm
+ PseudoRET
+...
+---
+name: xsfmm_different_state_bf
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: vrm8 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$v8m8', virtual-reg: '%1' }
+ - { reg: '$x10', virtual-reg: '%2' }
+ - { reg: '$x11', virtual-reg: '%3' }
+ - { reg: '$x12', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-LABEL: name: xsfmm_different_state_bf
+ ; CHECK: liveins: $v8m8, $v16m8, $x10, $x11, $x12
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrm8 = COPY $v16m8
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1288 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F_ALT $t2, [[COPY3]], [[COPY3]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY2]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY4]], [[COPY4]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %4:gprnox0 = COPY $x12
+ %3:gprnox0 = COPY $x11
+ %2:gprnox0 = COPY $x10
+ %1:vrm8 = COPY $v16m8
+ %0:vrm8 = COPY $v8m8
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F_ALT $t2, %1:vrm8, %1:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %2:gprnox0, %3:gprnox0, %4:gprnox0, 4, 2, implicit $frm
+ PseudoRET
+...
+---
+name: interleave_rvv_and_xsfmm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gpr }
+ - { id: 3, class: gpr }
+ - { id: 4, class: vrm8 }
+ - { id: 5, class: vrm8 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11
+ ; CHECK-LABEL: name: interleave_rvv_and_xsfmm
+ ; CHECK: liveins: $v8m8, $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[PseudoSF_VTMV_V_T]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_]], implicit $vtype
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %2:gpr = COPY $x11
+ %1:gprnox0 = COPY $x10
+ %0:vrm8 = COPY $v8m8
+ %3:gpr = ADDI $x0, 1
+ %4:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1
+ %5:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0
+ PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1
+ $v8m8 = COPY %5:vrm8
+ PseudoRET implicit $v8m8
+...
+---
+name: interleave_rvv_and_xsfmm2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gpr }
+ - { id: 3, class: gpr }
+ - { id: 4, class: vrm8 }
+ - { id: 5, class: vrm8 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11
+ ; CHECK-LABEL: name: interleave_rvv_and_xsfmm2
+ ; CHECK: liveins: $v8m8, $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[COPY2]], [[COPY2]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 512 /* e8, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VTMV_V_T:%[0-9]+]]:vrm8 = PseudoSF_VTMV_V_T [[ADDI]], $noreg, 3, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY1]], 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoVADD_VV_M8_1:%[0-9]+]]:vrm8 = PseudoVADD_VV_M8 $noreg, [[PseudoVADD_VV_M8_]], [[PseudoVADD_VV_M8_]], $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[ADDI]], [[COPY]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $v8m8 = COPY [[PseudoVADD_VV_M8_1]], implicit $vtype
+ ; CHECK-NEXT: PseudoRET implicit $v8m8
+ %2:gpr = COPY $x11
+ %1:gprnox0 = COPY $x10
+ %0:vrm8 = COPY $v8m8
+ %3:gpr = ADDI $x0, 1
+ %4:vrm8 = PseudoVADD_VV_M8 $noreg, %0:vrm8, killed %0:vrm8, %1:gprnox0, 3, 0
+ %5:vrm8 = PseudoSF_VTMV_V_T %3:gpr, %1:gprnox0, 3, 1
+ %6:vrm8 = PseudoVADD_VV_M8 $noreg, %4:vrm8, killed %4:vrm8, %1:gprnox0, 3, 0
+ PseudoSF_VSTE16 %3:gpr, %2:gpr, %1:gprnox0, 4, 1
+ $v8m8 = COPY %6:vrm8
+ PseudoRET implicit $v8m8
+...
+---
+name: consecutive_xsfmm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vrm8 }
+ - { id: 1, class: gprnox0 }
+ - { id: 2, class: gprnox0 }
+ - { id: 3, class: gprnox0 }
+ - { id: 4, class: gprnox0 }
+liveins:
+ - { reg: '$v8m8', virtual-reg: '%0' }
+ - { reg: '$x10', virtual-reg: '%1' }
+ - { reg: '$x11', virtual-reg: '%2' }
+ - { reg: '$x12', virtual-reg: '%3' }
+ - { reg: '$x13', virtual-reg: '%4' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $v8m8, $x10, $x11, $x12, $x13
+ ; CHECK-LABEL: name: consecutive_xsfmm
+ ; CHECK: liveins: $v8m8, $x10, $x11, $x12, $x13
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY $v8m8
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnox0 = COPY $x12
+ ; CHECK-NEXT: dead [[COPY4:%[0-9]+]]:gprnox0 = COPY $x13
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY2]], 1032 /* e16, w2 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY1]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTK [[COPY3]], 4, 2, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_MM_F_F $t2, [[COPY]], [[COPY]], 7, $noreg, $noreg, $noreg, 4, 2, implicit $frm, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY3]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: PseudoSF_VSTE16 [[COPY1]], [[COPY2]], $noreg, 4, 1, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:vrm8 = COPY $v8m8
+ %1:gprnox0 = COPY $x10
+ %2:gprnox0 = COPY $x11
+ %3:gprnox0 = COPY $x12
+ %4:gprnox0 = COPY $x13
+ PseudoSF_MM_F_F $t2, %0:vrm8, %0:vrm8, 7, %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 2, implicit $frm
+ PseudoSF_VSTE16 %1:gprnox0, %2:gprnox0, %3:gprnox0, 4, 1
+ PseudoRET
+...
+---
+name: vsettnt_max
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: vsettnt_max
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_1:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ %2:gprnox0 = PseudoSF_VSETTNTX0 $x0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ %3:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %3:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettm
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettm
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTM:%[0-9]+]]:gprnox0 = PseudoSF_VSETTM [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTM]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTM %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettn
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettn
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[PseudoSF_VSETTNT:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNT [[COPY]], 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTNT]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTNT %0:gprnox0, 520, implicit-def $vl, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: single_vsettk
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10
+ ; CHECK-LABEL: name: single_vsettk
+ ; CHECK: liveins: $x10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: dead [[PseudoSF_VSETTNTX0_:%[0-9]+]]:gprnox0 = PseudoSF_VSETTNTX0 killed $x0, 520 /* e16, w1 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: [[PseudoSF_VSETTK:%[0-9]+]]:gprnox0 = PseudoSF_VSETTK [[COPY]], 4, 1, implicit-def $vtype, implicit $vtype, implicit $vtype
+ ; CHECK-NEXT: $x10 = COPY [[PseudoSF_VSETTK]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = PseudoSF_VSETTK %0:gprnox0, 4, 1, implicit-def $vtype, implicit $vtype
+ $x10 = COPY %1:gprnox0
+ PseudoRET implicit $x10
+...
+---
+name: sf_vtzero
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: gprnox0 }
+ - { id: 1, class: gprnox0 }
+liveins:
+ - { reg: '$x10', virtual-reg: '%0' }
+ - { reg: '$x11', virtual-reg: '%1' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0.entry:
+ liveins: $x10, $x11
+ ; CHECK-LABEL: name: sf_vtzero
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnox0 = COPY $x11
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTNT [[COPY1]], 1536 /* e8, w4 */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead $x0 = PseudoSF_VSETTM [[COPY]], 3, 3, implicit-def $vtype, implicit $vtype
+ ; CHECK-NEXT: PseudoSF_VTZERO_T $t1, $noreg, $noreg, 3, 4, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: PseudoRET
+ %0:gprnox0 = COPY $x10
+ %1:gprnox0 = COPY $x11
+ PseudoSF_VTZERO_T $t1, %0:gprnox0, %1:gprnox0, 3, 4
+ PseudoRET
+...
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll
new file mode 100644
index 0000000..9b9a849
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.e4m3.e4m3 mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll
new file mode 100644
index 0000000..b63974f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.e4m3.e5m2 mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll
new file mode 100644
index 0000000..62d629b1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.e5m2.e4m3 mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll
new file mode 100644
index 0000000..7a90c97
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.e5m2.e5m2 mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll
new file mode 100644
index 0000000..29451c6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen, <vscale x 32 x half>, <vscale x 32 x half>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w2_f16m8(iXLen %mtd, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w2_f16m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e16, w2
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen 0, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 2)
+ ret void
+}
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen, <vscale x 16 x float>, <vscale x 16 x float>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w1_f32m8(iXLen %mtd, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w1_f32m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen 0, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1)
+ ret void
+}
+
+declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen, <vscale x 8 x double>, <vscale x 8 x double>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_f_f_w1_f64m8(iXLen %mtd, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_f_f_w1_f64m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.f.f mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen 0, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll
new file mode 100644
index 0000000..6a4b29f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_s_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_s_s_w4_i8m8_i8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.s.s mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll
new file mode 100644
index 0000000..79239b0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_s_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_s_u_w4_i8m8_i8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.s.u mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll
new file mode 100644
index 0000000..b0d039b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_u_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_u_s_w4_i8m8_i8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.u.s mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll
new file mode 100644
index 0000000..913c277
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen)
+
+define void @test_sf_mm_u_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) {
+; CHECK-LABEL: test_sf_mm_u_u_w4_i8m8_i8m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a1
+; CHECK-NEXT: sf.vsettk zero, a3
+; CHECK-NEXT: sf.mm.u.u mt0, v8, v16
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll
new file mode 100644
index 0000000..8048dec
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte16.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte16(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1
+; CHECK-NEXT: sf.vlte16 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vlte16.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll
new file mode 100644
index 0000000..a526dc8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte32.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte32(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT: sf.vlte32 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vlte32.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll
new file mode 100644
index 0000000..ed0c48a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte64.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte64(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT: sf.vlte64 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vlte64.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll
new file mode 100644
index 0000000..67b3ed2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vlte8.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vlte8(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vlte8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1
+; CHECK-NEXT: sf.vlte8 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vlte8.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll
new file mode 100644
index 0000000..4da37fa
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettk(iXLen %tk) {
+; CHECK-LABEL: test_sf_vsettk:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a1, zero, e16, w2
+; CHECK-NEXT: sf.vsettk a0, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen %tk, iXLen 1, iXLen 2)
+ ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll
new file mode 100644
index 0000000..143c26c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettm(iXLen %tm) {
+; CHECK-LABEL: test_sf_vsettm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a1, zero, e8, w4
+; CHECK-NEXT: sf.vsettm a0, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen %tm, iXLen 0, iXLen 3)
+ ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll
new file mode 100644
index 0000000..48fa1bc8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll
@@ -0,0 +1,72 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen, iXLen, iXLen)
+
+define iXLen @test_sf_vsettnt_e8w1(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e8, w1
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 1)
+ ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e8w2(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e8, w2
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 2)
+ ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e8w4(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e8w4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e8, w4
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 3)
+ ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w1(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e16, w1
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 1)
+ ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w2(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e16, w2
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 2)
+ ret iXLen %0
+}
+
+define iXLen @test_sf_vsettnt_e16w4(iXLen %tn) {
+; CHECK-LABEL: test_sf_vsettnt_e16w4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt a0, a0, e16, w4
+; CHECK-NEXT: ret
+ entry:
+ %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 3)
+ ret iXLen %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll
new file mode 100644
index 0000000..7a76151
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste16.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste16(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1
+; CHECK-NEXT: sf.vste16 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vste16.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll
new file mode 100644
index 0000000..8ff6e6a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste32.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste32(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1
+; CHECK-NEXT: sf.vste32 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vste32.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll
new file mode 100644
index 0000000..53990e4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste64.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste64(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1
+; CHECK-NEXT: sf.vste64 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vste64.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll
new file mode 100644
index 0000000..09b7259
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vste8.iXLen(iXLen, ptr, iXLen)
+
+define dso_local void @test_sf_vste8(iXLen %tss, ptr %base, iXLen %vl) {
+; CHECK-LABEL: test_sf_vste8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1
+; CHECK-NEXT: sf.vste8 a0, (a1)
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vste8.iXLen(iXLen %tss, ptr %base, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll
new file mode 100644
index 0000000..394eb60
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll
@@ -0,0 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtdiscard()
+
+define dso_local void @test_sf_vtdiscard() {
+; CHECK-LABEL: test_sf_vtdiscard:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vtdiscard
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtdiscard()
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll
new file mode 100644
index 0000000..66c9d26
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen, <vscale x 32 x bfloat>, iXLen)
+
+define void @test_sf_vtmv_t_v_bf16m8(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_bf16m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen, <vscale x 32 x half>, iXLen)
+
+define void @test_sf_vtmv_t_v_f16(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen, <vscale x 16 x float>, iXLen)
+
+define void @test_sf_vtmv_t_v_f32(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen, <vscale x 8 x double>, iXLen)
+
+define void @test_sf_vtmv_t_v_f64(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen, <vscale x 64 x i8>, iXLen)
+
+define void @test_sf_vtmv_t_v_i8(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen, <vscale x 32 x i16>, iXLen)
+
+define void @test_sf_vtmv_t_v_i16(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen, <vscale x 16 x i32>, iXLen)
+
+define void @test_sf_vtmv_t_v_i32(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl)
+ ret void
+}
+
+declare void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen, <vscale x 8 x i64>, iXLen)
+
+define void @test_sf_vtmv_t_v_i64(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_t_v_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT: sf.vtmv.t.v a0, v8
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll
new file mode 100644
index 0000000..0dcc2ab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x bfloat> @test_sf_vtmv_v_t_bf16m8(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_bf16m8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 32 x bfloat> %0
+}
+
+declare <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x half> @test_sf_vtmv_v_t_f16(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 32 x half> %0
+}
+
+declare <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen, iXLen)
+
+define <vscale x 16 x float> @test_sf_vtmv_v_t_f32(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 16 x float> %0
+}
+
+declare <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen, iXLen)
+
+define <vscale x 8 x double> @test_sf_vtmv_v_t_f64(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 8 x double> %0
+}
+
+declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen, iXLen)
+
+define <vscale x 64 x i8> @test_sf_vtmv_v_t_i8(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 64 x i8> %0
+}
+
+declare <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen, iXLen)
+
+define <vscale x 32 x i16> @test_sf_vtmv_v_t_i16(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 32 x i16> %0
+}
+
+declare <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen, iXLen)
+
+define <vscale x 16 x i32> @test_sf_vtmv_v_t_i32(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 16 x i32> %0
+}
+
+declare <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen, iXLen)
+
+define <vscale x 8 x i64> @test_sf_vtmv_v_t_i64(iXLen %tss, iXLen %vl) {
+; CHECK-LABEL: test_sf_vtmv_v_t_i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1
+; CHECK-NEXT: sf.vtmv.v.t v8, a0
+; CHECK-NEXT: ret
+ entry:
+ %0 = call <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen %tss, iXLen %vl)
+ ret <vscale x 8 x i64> %0
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll
new file mode 100644
index 0000000..bbccb02
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \
+; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare void @llvm.riscv.sf.vtzero.t.iXLen(iXLen, iXLen, iXLen, iXLen, iXLen)
+define void @test_sf_vtzero_t(iXLen %tm, iXLen %tn) {
+; CHECK-LABEL: test_sf_vtzero_t:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sf.vsettnt zero, a1, e8, w4
+; CHECK-NEXT: sf.vsettm zero, a0
+; CHECK-NEXT: sf.vtzero.t mt0
+; CHECK-NEXT: ret
+ entry:
+ call void @llvm.riscv.sf.vtzero.t.iXLen(iXLen 0, iXLen %tm, iXLen %tn, iXLen 3, iXLen 4)
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
index 2f03ff9..318268a 100644
--- a/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
+++ b/llvm/test/CodeGen/RISCV/select-to-and-zext.ll
@@ -15,8 +15,7 @@ define i32 @from_cmpeq(i32 %xx, i32 %y) {
;
; RV64I-LABEL: from_cmpeq:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: addi a0, a0, -9
+; RV64I-NEXT: addiw a0, a0, -9
; RV64I-NEXT: seqz a0, a0
; RV64I-NEXT: and a0, a0, a1
; RV64I-NEXT: ret
@@ -39,8 +38,7 @@ define i32 @from_cmpeq_fail_bad_andmask(i32 %xx, i32 %y) {
;
; RV64I-LABEL: from_cmpeq_fail_bad_andmask:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: addi a0, a0, -9
+; RV64I-NEXT: addiw a0, a0, -9
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: addi a0, a0, -1
; RV64I-NEXT: and a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/setcc-logic.ll b/llvm/test/CodeGen/RISCV/setcc-logic.ll
index fabb573..4e14893 100644
--- a/llvm/test/CodeGen/RISCV/setcc-logic.ll
+++ b/llvm/test/CodeGen/RISCV/setcc-logic.ll
@@ -104,9 +104,8 @@ define i1 @and_icmps_const_not1bit_diff(i32 %x) nounwind {
;
; RV64I-LABEL: and_icmps_const_not1bit_diff:
; RV64I: # %bb.0:
-; RV64I-NEXT: sext.w a0, a0
-; RV64I-NEXT: addi a1, a0, -44
-; RV64I-NEXT: addi a0, a0, -92
+; RV64I-NEXT: addiw a1, a0, -44
+; RV64I-NEXT: addiw a0, a0, -92
; RV64I-NEXT: snez a1, a1
; RV64I-NEXT: snez a0, a0
; RV64I-NEXT: and a0, a1, a0
diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
index bdbe4ed..07bfbe6 100644
--- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
+++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll
@@ -674,8 +674,7 @@ define i32 @sext_of_not_cmp_i32(i32 %x) {
;
; RV64-LABEL: sext_of_not_cmp_i32:
; RV64: # %bb.0:
-; RV64-NEXT: sext.w a0, a0
-; RV64-NEXT: addi a0, a0, -7
+; RV64-NEXT: addiw a0, a0, -7
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: ret
@@ -718,8 +717,7 @@ define i32 @dec_of_zexted_cmp_i32(i32 %x) {
;
; RV64-LABEL: dec_of_zexted_cmp_i32:
; RV64: # %bb.0:
-; RV64-NEXT: sext.w a0, a0
-; RV64-NEXT: addi a0, a0, -7
+; RV64-NEXT: addiw a0, a0, -7
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: addi a0, a0, -1
; RV64-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index 2751332c..bf6802d 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -1047,8 +1047,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
; RV64-LABEL: usubo.i32.constant.lhs:
; RV64: # %bb.0: # %entry
; RV64-NEXT: li a2, -2
-; RV64-NEXT: subw a2, a2, a0
-; RV64-NEXT: addi a0, a2, 1
+; RV64-NEXT: sub a2, a2, a0
+; RV64-NEXT: addiw a0, a2, 1
; RV64-NEXT: seqz a0, a0
; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: ret
@@ -1065,8 +1065,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
; RV64ZBA-LABEL: usubo.i32.constant.lhs:
; RV64ZBA: # %bb.0: # %entry
; RV64ZBA-NEXT: li a2, -2
-; RV64ZBA-NEXT: subw a2, a2, a0
-; RV64ZBA-NEXT: addi a0, a2, 1
+; RV64ZBA-NEXT: sub a2, a2, a0
+; RV64ZBA-NEXT: addiw a0, a2, 1
; RV64ZBA-NEXT: seqz a0, a0
; RV64ZBA-NEXT: sw a2, 0(a1)
; RV64ZBA-NEXT: ret
@@ -1083,8 +1083,8 @@ define zeroext i1 @usubo.i32.constant.lhs(i32 signext %v1, ptr %res) {
; RV64ZICOND-LABEL: usubo.i32.constant.lhs:
; RV64ZICOND: # %bb.0: # %entry
; RV64ZICOND-NEXT: li a2, -2
-; RV64ZICOND-NEXT: subw a2, a2, a0
-; RV64ZICOND-NEXT: addi a0, a2, 1
+; RV64ZICOND-NEXT: sub a2, a2, a0
+; RV64ZICOND-NEXT: addiw a0, a2, 1
; RV64ZICOND-NEXT: seqz a0, a0
; RV64ZICOND-NEXT: sw a2, 0(a1)
; RV64ZICOND-NEXT: ret
diff --git a/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
new file mode 100644
index 0000000..abbd953
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/mem-intrinsics-offsets.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mcpu=mvp -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s
+
+; This test ensures that loads and stores generated for small memcpy et al use
+; constant offset folding.
+
+
+target triple = "wasm32-unknown-unknown"
+
+define void @call_memset(ptr) #0 {
+; CHECK-LABEL: call_memset:
+; CHECK: .functype call_memset (i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.const $push0=, 0
+; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT: i64.const $push1=, 0
+; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT: # fallthrough-return
+ call void @llvm.memset.p0.i32(ptr align 1 %0, i8 0, i32 16, i1 false)
+ ret void
+}
+
+define void @call_memcpy(ptr %dst, ptr %src) #0 {
+; CHECK-LABEL: call_memcpy:
+; CHECK: .functype call_memcpy (i32, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT: i64.load $push1=, 0($1):p2align=0
+; CHECK-NEXT: i64.store 0($0):p2align=0, $pop1
+; CHECK-NEXT: # fallthrough-return
+ call void @llvm.memcpy.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+ ret void
+}
+
+
+define void @call_memmove(ptr %dst, ptr %src) #0 {
+; CHECK-LABEL: call_memmove:
+; CHECK: .functype call_memmove (i32, i32) -> ()
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: i64.load $2=, 0($1):p2align=0
+; CHECK-NEXT: i64.load $push0=, 8($1):p2align=0
+; CHECK-NEXT: i64.store 8($0):p2align=0, $pop0
+; CHECK-NEXT: i64.store 0($0):p2align=0, $2
+; CHECK-NEXT: # fallthrough-return
+ call void @llvm.memmove.p0.p0.i32(ptr align 1 %dst, ptr align 1 %src, i32 16, i1 false)
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll
new file mode 100644
index 0000000..3654aae
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-dot-reductions.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+define <4 x i32> @dot_sext_1(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: dot_sext_1:
+; CHECK: .functype dot_sext_1 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.dot_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+ %sext1 = sext <8 x i16> %a to <8 x i32>
+ %sext2 = sext <8 x i16> %b to <8 x i32>
+ %mul = mul <8 x i32> %sext1, %sext2
+ %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %res = add <4 x i32> %shuffle1, %shuffle2
+ ret <4 x i32> %res
+}
+
+
+define <4 x i32> @dot_sext_2(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: dot_sext_2:
+; CHECK: .functype dot_sext_2 (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.dot_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+ %sext1 = sext <8 x i16> %a to <8 x i32>
+ %sext2 = sext <8 x i16> %b to <8 x i32>
+ %mul = mul <8 x i32> %sext1, %sext2
+ %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %res = add <4 x i32> %shuffle2, %shuffle1
+ ret <4 x i32> %res
+}
+
+define <4 x i32> @dot_sext_self(<8 x i16> %v) {
+; CHECK-LABEL: dot_sext_self:
+; CHECK: .functype dot_sext_self (v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32x4.dot_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+ %sext = sext <8 x i16> %v to <8 x i32>
+ %mul = mul <8 x i32> %sext, %sext
+ %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %res = add <4 x i32> %shuffle1, %shuffle2
+ ret <4 x i32> %res
+}
+
+; INFO: Negative test
+define <4 x i32> @dot_zext(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: dot_zext:
+; CHECK: .functype dot_zext (v128, v128) -> (v128)
+; CHECK-NEXT: .local v128
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.extmul_low_i16x8_u
+; CHECK-NEXT: local.tee 2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.extmul_high_i16x8_u
+; CHECK-NEXT: local.tee 1
+; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT: local.get 2
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.shuffle 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31
+; CHECK-NEXT: i32x4.add
+; CHECK-NEXT: # fallthrough-return
+ %zext1 = zext <8 x i16> %a to <8 x i32>
+ %zext2 = zext <8 x i16> %b to <8 x i32>
+ %mul = mul <8 x i32> %zext1, %zext2
+ %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+ %res = add <4 x i32> %shuffle1, %shuffle2
+ ret <4 x i32> %res
+}
+
+; INFO: Negative test
+define <4 x i32> @dot_wrong_shuffle(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: dot_wrong_shuffle:
+; CHECK: .functype dot_wrong_shuffle (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0:
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.extmul_low_i16x8_s
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i32x4.extmul_high_i16x8_s
+; CHECK-NEXT: i32x4.add
+; CHECK-NEXT: # fallthrough-return
+ %sext1 = sext <8 x i16> %a to <8 x i32>
+ %sext2 = sext <8 x i16> %b to <8 x i32>
+ %mul = mul <8 x i32> %sext1, %sext2
+ %shuffle1 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %shuffle2 = shufflevector <8 x i32> %mul, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %res = add <4 x i32> %shuffle1, %shuffle2
+ ret <4 x i32> %res
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
index e065de3..600241a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -2,9 +2,278 @@
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefix=NOFP16
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s --check-prefix=NOSIMD
target triple = "wasm32"
+define half @fadd_fmul_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f16:
+; RELAXED: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fadd_fmul_contract_f16:
+; STRICT: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $0
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $1
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fadd_fmul_contract_f16:
+; NOFP16: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f16:
+; NOSIMD: .functype fadd_fmul_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %mul = fmul contract half %b, %a
+ %add = fadd contract half %mul, %c
+ ret half %add
+}
+
+define half @fmuladd_contract_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fmuladd_contract_f16:
+; RELAXED: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fmuladd_contract_f16:
+; STRICT: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $1
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $0
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fmuladd_contract_f16:
+; NOFP16: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fmuladd_contract_f16:
+; NOSIMD: .functype fmuladd_contract_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %fma = call contract half @llvm.fmuladd(half %a, half %b, half %c)
+ ret half %fma
+}
+
+define half @fmuladd_f16(half %a, half %b, half %c) {
+; RELAXED-LABEL: fmuladd_f16:
+; RELAXED: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: call $push0=, __truncsfhf2, $1
+; RELAXED-NEXT: call $push1=, __extendhfsf2, $pop0
+; RELAXED-NEXT: call $push2=, __truncsfhf2, $0
+; RELAXED-NEXT: call $push3=, __extendhfsf2, $pop2
+; RELAXED-NEXT: f32.mul $push4=, $pop1, $pop3
+; RELAXED-NEXT: call $push5=, __truncsfhf2, $2
+; RELAXED-NEXT: call $push6=, __extendhfsf2, $pop5
+; RELAXED-NEXT: f32.add $push7=, $pop4, $pop6
+; RELAXED-NEXT: return $pop7
+;
+; STRICT-LABEL: fmuladd_f16:
+; STRICT: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: call $push0=, __truncsfhf2, $1
+; STRICT-NEXT: call $push1=, __extendhfsf2, $pop0
+; STRICT-NEXT: call $push2=, __truncsfhf2, $0
+; STRICT-NEXT: call $push3=, __extendhfsf2, $pop2
+; STRICT-NEXT: f32.mul $push4=, $pop1, $pop3
+; STRICT-NEXT: call $push5=, __truncsfhf2, $2
+; STRICT-NEXT: call $push6=, __extendhfsf2, $pop5
+; STRICT-NEXT: f32.add $push7=, $pop4, $pop6
+; STRICT-NEXT: return $pop7
+;
+; NOFP16-LABEL: fmuladd_f16:
+; NOFP16: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $0
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: return $pop7
+;
+; NOSIMD-LABEL: fmuladd_f16:
+; NOSIMD: .functype fmuladd_f16 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $0
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: return $pop7
+ %fma = call half @llvm.fmuladd(half %a, half %b, half %c)
+ ret half %fma
+}
+
+
+define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f32:
+; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $1, $0
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fadd_fmul_contract_f32:
+; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $1, $0
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_f32:
+; NOFP16: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $1, $0
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f32:
+; NOSIMD: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $1, $0
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %mul = fmul contract float %b, %a
+ %add = fadd contract float %mul, %c
+ ret float %add
+}
+
+define float @fmuladd_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fmuladd_contract_f32:
+; RELAXED: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $0, $1
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_contract_f32:
+; STRICT: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $0, $1
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_f32:
+; NOFP16: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $0, $1
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_f32:
+; NOSIMD: .functype fmuladd_contract_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $0, $1
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call contract float @llvm.fmuladd(float %a, float %b, float %c)
+ ret float %fma
+}
+
+define float @fmuladd_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fmuladd_f32:
+; RELAXED: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $0, $1
+; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_f32:
+; STRICT: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $0, $1
+; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_f32:
+; NOFP16: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32.mul $push0=, $0, $1
+; NOFP16-NEXT: f32.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_f32:
+; NOSIMD: .functype fmuladd_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $0, $1
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call float @llvm.fmuladd(float %a, float %b, float %c)
+ ret float %fma
+}
+
define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; RELAXED-LABEL: fadd_fmul_contract_f64:
; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
@@ -19,16 +288,94 @@ define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
; STRICT-NEXT: f64.mul $push0=, $1, $0
; STRICT-NEXT: f64.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_f64:
+; NOFP16: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $1, $0
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_f64:
+; NOSIMD: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $1, $0
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
%mul = fmul contract double %b, %a
%add = fadd contract double %mul, %c
ret double %add
}
+define double @fmuladd_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fmuladd_f64:
+; RELAXED: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64.mul $push0=, $0, $1
+; RELAXED-NEXT: f64.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_f64:
+; STRICT: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64.mul $push0=, $0, $1
+; STRICT-NEXT: f64.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_f64:
+; NOFP16: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $0, $1
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_f64:
+; NOSIMD: .functype fmuladd_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $0, $1
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call double @llvm.fmuladd(double %a, double %b, double %c)
+ ret double %fma
+}
+
+define double @fmuladd_contract_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fmuladd_contract_f64:
+; RELAXED: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64.mul $push0=, $0, $1
+; RELAXED-NEXT: f64.add $push1=, $pop0, $2
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fmuladd_contract_f64:
+; STRICT: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64.mul $push0=, $0, $1
+; STRICT-NEXT: f64.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_f64:
+; NOFP16: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64.mul $push0=, $0, $1
+; NOFP16-NEXT: f64.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_f64:
+; NOSIMD: .functype fmuladd_contract_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $0, $1
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $2
+; NOSIMD-NEXT: return $pop1
+ %fma = call contract double @llvm.fmuladd(double %a, double %b, double %c)
+ ret double %fma
+}
+
define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_contract_4xf32:
; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_4xf32:
@@ -37,31 +384,222 @@ define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_4xf32:
+; NOFP16: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_4xf32:
+; NOSIMD: .functype fadd_fmul_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $8, $4
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $7, $3
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $6, $2
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $5, $1
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%mul = fmul contract <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
-
define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_8xf16:
; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.mul $push0=, $1, $0
-; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
-; STRICT-NEXT: return $pop1
+; STRICT-NEXT: f16x8.madd $push0=, $1, $0, $2
+; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fadd_fmul_contract_8xf16:
+; NOFP16: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fadd_fmul_contract_8xf16:
+; NOSIMD: .functype fadd_fmul_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
%mul = fmul contract <8 x half> %b, %a
%add = fadd contract <8 x half> %mul, %c
ret <8 x half> %add
}
-
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fadd_fmul_4xf32:
; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
@@ -76,16 +614,412 @@ define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float>
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_4xf32:
+; NOFP16: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $1, $0
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_4xf32:
+; NOSIMD: .functype fadd_fmul_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $8, $4
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $7, $3
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $6, $2
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $5, $1
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%mul = fmul <4 x float> %b, %a
%add = fadd contract <4 x float> %mul, %c
ret <4 x float> %add
}
+define <8 x half> @fmuladd_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_contract_8xf16:
+; RELAXED: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_8xf16:
+; STRICT: .functype fmuladd_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
+; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fmuladd_contract_8xf16:
+; NOFP16: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fmuladd_contract_8xf16:
+; NOSIMD: .functype fmuladd_contract_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
+ %fma = call contract <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.madd $push0=, $0, $1, $2
+; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fmuladd_8xf16:
+; NOFP16: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, __truncsfhf2, $16
+; NOFP16-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOFP16-NEXT: call $push2=, __truncsfhf2, $8
+; NOFP16-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOFP16-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOFP16-NEXT: call $push5=, __truncsfhf2, $24
+; NOFP16-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOFP16-NEXT: f32.add $push7=, $pop4, $pop6
+; NOFP16-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOFP16-NEXT: i32.store16 14($0), $pop8
+; NOFP16-NEXT: call $push9=, __truncsfhf2, $15
+; NOFP16-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOFP16-NEXT: call $push11=, __truncsfhf2, $7
+; NOFP16-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOFP16-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOFP16-NEXT: call $push14=, __truncsfhf2, $23
+; NOFP16-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOFP16-NEXT: f32.add $push16=, $pop13, $pop15
+; NOFP16-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOFP16-NEXT: i32.store16 12($0), $pop17
+; NOFP16-NEXT: call $push18=, __truncsfhf2, $14
+; NOFP16-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOFP16-NEXT: call $push20=, __truncsfhf2, $6
+; NOFP16-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOFP16-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOFP16-NEXT: call $push23=, __truncsfhf2, $22
+; NOFP16-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOFP16-NEXT: f32.add $push25=, $pop22, $pop24
+; NOFP16-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOFP16-NEXT: i32.store16 10($0), $pop26
+; NOFP16-NEXT: call $push27=, __truncsfhf2, $13
+; NOFP16-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOFP16-NEXT: call $push29=, __truncsfhf2, $5
+; NOFP16-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOFP16-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOFP16-NEXT: call $push32=, __truncsfhf2, $21
+; NOFP16-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOFP16-NEXT: f32.add $push34=, $pop31, $pop33
+; NOFP16-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOFP16-NEXT: i32.store16 8($0), $pop35
+; NOFP16-NEXT: call $push36=, __truncsfhf2, $12
+; NOFP16-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOFP16-NEXT: call $push38=, __truncsfhf2, $4
+; NOFP16-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOFP16-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOFP16-NEXT: call $push41=, __truncsfhf2, $20
+; NOFP16-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOFP16-NEXT: f32.add $push43=, $pop40, $pop42
+; NOFP16-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOFP16-NEXT: i32.store16 6($0), $pop44
+; NOFP16-NEXT: call $push45=, __truncsfhf2, $11
+; NOFP16-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOFP16-NEXT: call $push47=, __truncsfhf2, $3
+; NOFP16-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOFP16-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOFP16-NEXT: call $push50=, __truncsfhf2, $19
+; NOFP16-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOFP16-NEXT: f32.add $push52=, $pop49, $pop51
+; NOFP16-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOFP16-NEXT: i32.store16 4($0), $pop53
+; NOFP16-NEXT: call $push54=, __truncsfhf2, $10
+; NOFP16-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOFP16-NEXT: call $push56=, __truncsfhf2, $2
+; NOFP16-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOFP16-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOFP16-NEXT: call $push59=, __truncsfhf2, $18
+; NOFP16-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOFP16-NEXT: f32.add $push61=, $pop58, $pop60
+; NOFP16-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOFP16-NEXT: i32.store16 2($0), $pop62
+; NOFP16-NEXT: call $push63=, __truncsfhf2, $9
+; NOFP16-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOFP16-NEXT: call $push65=, __truncsfhf2, $1
+; NOFP16-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOFP16-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOFP16-NEXT: call $push68=, __truncsfhf2, $17
+; NOFP16-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOFP16-NEXT: f32.add $push70=, $pop67, $pop69
+; NOFP16-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOFP16-NEXT: i32.store16 0($0), $pop71
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fmuladd_8xf16:
+; NOSIMD: .functype fmuladd_8xf16 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, __truncsfhf2, $16
+; NOSIMD-NEXT: call $push1=, __extendhfsf2, $pop0
+; NOSIMD-NEXT: call $push2=, __truncsfhf2, $8
+; NOSIMD-NEXT: call $push3=, __extendhfsf2, $pop2
+; NOSIMD-NEXT: f32.mul $push4=, $pop1, $pop3
+; NOSIMD-NEXT: call $push5=, __truncsfhf2, $24
+; NOSIMD-NEXT: call $push6=, __extendhfsf2, $pop5
+; NOSIMD-NEXT: f32.add $push7=, $pop4, $pop6
+; NOSIMD-NEXT: call $push8=, __truncsfhf2, $pop7
+; NOSIMD-NEXT: i32.store16 14($0), $pop8
+; NOSIMD-NEXT: call $push9=, __truncsfhf2, $15
+; NOSIMD-NEXT: call $push10=, __extendhfsf2, $pop9
+; NOSIMD-NEXT: call $push11=, __truncsfhf2, $7
+; NOSIMD-NEXT: call $push12=, __extendhfsf2, $pop11
+; NOSIMD-NEXT: f32.mul $push13=, $pop10, $pop12
+; NOSIMD-NEXT: call $push14=, __truncsfhf2, $23
+; NOSIMD-NEXT: call $push15=, __extendhfsf2, $pop14
+; NOSIMD-NEXT: f32.add $push16=, $pop13, $pop15
+; NOSIMD-NEXT: call $push17=, __truncsfhf2, $pop16
+; NOSIMD-NEXT: i32.store16 12($0), $pop17
+; NOSIMD-NEXT: call $push18=, __truncsfhf2, $14
+; NOSIMD-NEXT: call $push19=, __extendhfsf2, $pop18
+; NOSIMD-NEXT: call $push20=, __truncsfhf2, $6
+; NOSIMD-NEXT: call $push21=, __extendhfsf2, $pop20
+; NOSIMD-NEXT: f32.mul $push22=, $pop19, $pop21
+; NOSIMD-NEXT: call $push23=, __truncsfhf2, $22
+; NOSIMD-NEXT: call $push24=, __extendhfsf2, $pop23
+; NOSIMD-NEXT: f32.add $push25=, $pop22, $pop24
+; NOSIMD-NEXT: call $push26=, __truncsfhf2, $pop25
+; NOSIMD-NEXT: i32.store16 10($0), $pop26
+; NOSIMD-NEXT: call $push27=, __truncsfhf2, $13
+; NOSIMD-NEXT: call $push28=, __extendhfsf2, $pop27
+; NOSIMD-NEXT: call $push29=, __truncsfhf2, $5
+; NOSIMD-NEXT: call $push30=, __extendhfsf2, $pop29
+; NOSIMD-NEXT: f32.mul $push31=, $pop28, $pop30
+; NOSIMD-NEXT: call $push32=, __truncsfhf2, $21
+; NOSIMD-NEXT: call $push33=, __extendhfsf2, $pop32
+; NOSIMD-NEXT: f32.add $push34=, $pop31, $pop33
+; NOSIMD-NEXT: call $push35=, __truncsfhf2, $pop34
+; NOSIMD-NEXT: i32.store16 8($0), $pop35
+; NOSIMD-NEXT: call $push36=, __truncsfhf2, $12
+; NOSIMD-NEXT: call $push37=, __extendhfsf2, $pop36
+; NOSIMD-NEXT: call $push38=, __truncsfhf2, $4
+; NOSIMD-NEXT: call $push39=, __extendhfsf2, $pop38
+; NOSIMD-NEXT: f32.mul $push40=, $pop37, $pop39
+; NOSIMD-NEXT: call $push41=, __truncsfhf2, $20
+; NOSIMD-NEXT: call $push42=, __extendhfsf2, $pop41
+; NOSIMD-NEXT: f32.add $push43=, $pop40, $pop42
+; NOSIMD-NEXT: call $push44=, __truncsfhf2, $pop43
+; NOSIMD-NEXT: i32.store16 6($0), $pop44
+; NOSIMD-NEXT: call $push45=, __truncsfhf2, $11
+; NOSIMD-NEXT: call $push46=, __extendhfsf2, $pop45
+; NOSIMD-NEXT: call $push47=, __truncsfhf2, $3
+; NOSIMD-NEXT: call $push48=, __extendhfsf2, $pop47
+; NOSIMD-NEXT: f32.mul $push49=, $pop46, $pop48
+; NOSIMD-NEXT: call $push50=, __truncsfhf2, $19
+; NOSIMD-NEXT: call $push51=, __extendhfsf2, $pop50
+; NOSIMD-NEXT: f32.add $push52=, $pop49, $pop51
+; NOSIMD-NEXT: call $push53=, __truncsfhf2, $pop52
+; NOSIMD-NEXT: i32.store16 4($0), $pop53
+; NOSIMD-NEXT: call $push54=, __truncsfhf2, $10
+; NOSIMD-NEXT: call $push55=, __extendhfsf2, $pop54
+; NOSIMD-NEXT: call $push56=, __truncsfhf2, $2
+; NOSIMD-NEXT: call $push57=, __extendhfsf2, $pop56
+; NOSIMD-NEXT: f32.mul $push58=, $pop55, $pop57
+; NOSIMD-NEXT: call $push59=, __truncsfhf2, $18
+; NOSIMD-NEXT: call $push60=, __extendhfsf2, $pop59
+; NOSIMD-NEXT: f32.add $push61=, $pop58, $pop60
+; NOSIMD-NEXT: call $push62=, __truncsfhf2, $pop61
+; NOSIMD-NEXT: i32.store16 2($0), $pop62
+; NOSIMD-NEXT: call $push63=, __truncsfhf2, $9
+; NOSIMD-NEXT: call $push64=, __extendhfsf2, $pop63
+; NOSIMD-NEXT: call $push65=, __truncsfhf2, $1
+; NOSIMD-NEXT: call $push66=, __extendhfsf2, $pop65
+; NOSIMD-NEXT: f32.mul $push67=, $pop64, $pop66
+; NOSIMD-NEXT: call $push68=, __truncsfhf2, $17
+; NOSIMD-NEXT: call $push69=, __extendhfsf2, $pop68
+; NOSIMD-NEXT: f32.add $push70=, $pop67, $pop69
+; NOSIMD-NEXT: call $push71=, __truncsfhf2, $pop70
+; NOSIMD-NEXT: i32.store16 0($0), $pop71
+; NOSIMD-NEXT: return
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %a, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_contract_4xf32:
; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_contract_4xf32:
@@ -94,18 +1028,40 @@ define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_4xf32:
+; NOFP16: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_4xf32:
+; NOSIMD: .functype fmuladd_contract_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $4, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $3, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $2, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $1, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
-; TODO: This should also have relaxed_madd in RELAXED case
define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fmuladd_4xf32:
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
-; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
-; RELAXED-NEXT: return $pop1
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fmuladd_4xf32:
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
@@ -113,10 +1069,170 @@ define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_4xf32:
+; NOFP16: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $0, $1
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_4xf32:
+; NOSIMD: .functype fmuladd_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $4, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $3, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $2, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $1, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop7
+; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
+define <8 x float> @fmuladd_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; RELAXED-LABEL: fmuladd_8xf32:
+; RELAXED: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.mul $push0=, $2, $4
+; RELAXED-NEXT: f32x4.add $push1=, $pop0, $6
+; RELAXED-NEXT: v128.store 16($0), $pop1
+; RELAXED-NEXT: f32x4.mul $push2=, $1, $3
+; RELAXED-NEXT: f32x4.add $push3=, $pop2, $5
+; RELAXED-NEXT: v128.store 0($0), $pop3
+; RELAXED-NEXT: return
+;
+; STRICT-LABEL: fmuladd_8xf32:
+; STRICT: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $2, $4
+; STRICT-NEXT: f32x4.add $push1=, $pop0, $6
+; STRICT-NEXT: v128.store 16($0), $pop1
+; STRICT-NEXT: f32x4.mul $push2=, $1, $3
+; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
+; STRICT-NEXT: v128.store 0($0), $pop3
+; STRICT-NEXT: return
+;
+; NOFP16-LABEL: fmuladd_8xf32:
+; NOFP16: .functype fmuladd_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $2, $4
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6
+; NOFP16-NEXT: v128.store 16($0), $pop1
+; NOFP16-NEXT: f32x4.mul $push2=, $1, $3
+; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5
+; NOFP16-NEXT: v128.store 0($0), $pop3
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fmuladd_8xf32:
+; NOSIMD: .functype fmuladd_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $8, $16
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $24
+; NOSIMD-NEXT: f32.store 28($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $7, $15
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $23
+; NOSIMD-NEXT: f32.store 24($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $6, $14
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $22
+; NOSIMD-NEXT: f32.store 20($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $5, $13
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $21
+; NOSIMD-NEXT: f32.store 16($0), $pop7
+; NOSIMD-NEXT: f32.mul $push8=, $4, $12
+; NOSIMD-NEXT: f32.add $push9=, $pop8, $20
+; NOSIMD-NEXT: f32.store 12($0), $pop9
+; NOSIMD-NEXT: f32.mul $push10=, $3, $11
+; NOSIMD-NEXT: f32.add $push11=, $pop10, $19
+; NOSIMD-NEXT: f32.store 8($0), $pop11
+; NOSIMD-NEXT: f32.mul $push12=, $2, $10
+; NOSIMD-NEXT: f32.add $push13=, $pop12, $18
+; NOSIMD-NEXT: f32.store 4($0), $pop13
+; NOSIMD-NEXT: f32.mul $push14=, $1, $9
+; NOSIMD-NEXT: f32.add $push15=, $pop14, $17
+; NOSIMD-NEXT: f32.store 0($0), $pop15
+; NOSIMD-NEXT: return
+ %fma = call <8 x float> @llvm.fmuladd(<8 x float> %a, <8 x float> %b, <8 x float> %c)
+ ret <8 x float> %fma
+}
+
+define <2 x double> @fmuladd_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_contract_2xf64:
+; RELAXED: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_2xf64:
+; STRICT: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_contract_2xf64:
+; NOFP16: .functype fmuladd_contract_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_contract_2xf64:
+; NOSIMD: .functype fmuladd_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $2, $4
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $1, $3
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %fma = call contract <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
+
+define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_2xf64:
+; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_2xf64:
+; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fmuladd_2xf64:
+; NOFP16: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $0, $1
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fmuladd_2xf64:
+; NOSIMD: .functype fmuladd_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $2, $4
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $1, $3
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %fma = call <2 x double> @llvm.fmuladd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
+
define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; RELAXED-LABEL: fma_4xf32:
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
@@ -167,6 +1283,44 @@ define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
; STRICT-NEXT: return $pop19
+;
+; NOFP16-LABEL: fma_4xf32:
+; NOFP16: .functype fma_4xf32 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.extract_lane $push2=, $0, 0
+; NOFP16-NEXT: f32x4.extract_lane $push1=, $1, 0
+; NOFP16-NEXT: f32x4.extract_lane $push0=, $2, 0
+; NOFP16-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
+; NOFP16-NEXT: f32x4.splat $push4=, $pop3
+; NOFP16-NEXT: f32x4.extract_lane $push7=, $0, 1
+; NOFP16-NEXT: f32x4.extract_lane $push6=, $1, 1
+; NOFP16-NEXT: f32x4.extract_lane $push5=, $2, 1
+; NOFP16-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
+; NOFP16-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
+; NOFP16-NEXT: f32x4.extract_lane $push12=, $0, 2
+; NOFP16-NEXT: f32x4.extract_lane $push11=, $1, 2
+; NOFP16-NEXT: f32x4.extract_lane $push10=, $2, 2
+; NOFP16-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
+; NOFP16-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
+; NOFP16-NEXT: f32x4.extract_lane $push17=, $0, 3
+; NOFP16-NEXT: f32x4.extract_lane $push16=, $1, 3
+; NOFP16-NEXT: f32x4.extract_lane $push15=, $2, 3
+; NOFP16-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
+; NOFP16-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
+; NOFP16-NEXT: return $pop19
+;
+; NOSIMD-LABEL: fma_4xf32:
+; NOSIMD: .functype fma_4xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fmaf, $4, $8, $12
+; NOSIMD-NEXT: f32.store 12($0), $pop0
+; NOSIMD-NEXT: call $push1=, fmaf, $3, $7, $11
+; NOSIMD-NEXT: f32.store 8($0), $pop1
+; NOSIMD-NEXT: call $push2=, fmaf, $2, $6, $10
+; NOSIMD-NEXT: f32.store 4($0), $pop2
+; NOSIMD-NEXT: call $push3=, fmaf, $1, $5, $9
+; NOSIMD-NEXT: f32.store 0($0), $pop3
+; NOSIMD-NEXT: return
%fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
ret <4 x float> %fma
}
@@ -176,9 +1330,9 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fadd_fmul_contract_8xf32:
; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -192,17 +1346,56 @@ define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
; STRICT-NEXT: v128.store 0($0), $pop3
; STRICT-NEXT: return
+;
+; NOFP16-LABEL: fadd_fmul_contract_8xf32:
+; NOFP16: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f32x4.mul $push0=, $4, $2
+; NOFP16-NEXT: f32x4.add $push1=, $pop0, $6
+; NOFP16-NEXT: v128.store 16($0), $pop1
+; NOFP16-NEXT: f32x4.mul $push2=, $3, $1
+; NOFP16-NEXT: f32x4.add $push3=, $pop2, $5
+; NOFP16-NEXT: v128.store 0($0), $pop3
+; NOFP16-NEXT: return
+;
+; NOSIMD-LABEL: fadd_fmul_contract_8xf32:
+; NOSIMD: .functype fadd_fmul_contract_8xf32 (i32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f32.mul $push0=, $16, $8
+; NOSIMD-NEXT: f32.add $push1=, $pop0, $24
+; NOSIMD-NEXT: f32.store 28($0), $pop1
+; NOSIMD-NEXT: f32.mul $push2=, $15, $7
+; NOSIMD-NEXT: f32.add $push3=, $pop2, $23
+; NOSIMD-NEXT: f32.store 24($0), $pop3
+; NOSIMD-NEXT: f32.mul $push4=, $14, $6
+; NOSIMD-NEXT: f32.add $push5=, $pop4, $22
+; NOSIMD-NEXT: f32.store 20($0), $pop5
+; NOSIMD-NEXT: f32.mul $push6=, $13, $5
+; NOSIMD-NEXT: f32.add $push7=, $pop6, $21
+; NOSIMD-NEXT: f32.store 16($0), $pop7
+; NOSIMD-NEXT: f32.mul $push8=, $12, $4
+; NOSIMD-NEXT: f32.add $push9=, $pop8, $20
+; NOSIMD-NEXT: f32.store 12($0), $pop9
+; NOSIMD-NEXT: f32.mul $push10=, $11, $3
+; NOSIMD-NEXT: f32.add $push11=, $pop10, $19
+; NOSIMD-NEXT: f32.store 8($0), $pop11
+; NOSIMD-NEXT: f32.mul $push12=, $10, $2
+; NOSIMD-NEXT: f32.add $push13=, $pop12, $18
+; NOSIMD-NEXT: f32.store 4($0), $pop13
+; NOSIMD-NEXT: f32.mul $push14=, $9, $1
+; NOSIMD-NEXT: f32.add $push15=, $pop14, $17
+; NOSIMD-NEXT: f32.store 0($0), $pop15
+; NOSIMD-NEXT: return
%mul = fmul contract <8 x float> %b, %a
%add = fadd contract <8 x float> %mul, %c
ret <8 x float> %add
}
-
define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
; RELAXED-LABEL: fadd_fmul_contract_2xf64:
; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fadd_fmul_contract_2xf64:
@@ -211,28 +1404,64 @@ define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; STRICT-NEXT: f64x2.mul $push0=, $1, $0
; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
+;
+; NOFP16-LABEL: fadd_fmul_contract_2xf64:
+; NOFP16: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_contract_2xf64:
+; NOSIMD: .functype fadd_fmul_contract_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $4, $2
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $3, $1
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
%mul = fmul contract <2 x double> %b, %a
%add = fadd contract <2 x double> %mul, %c
ret <2 x double> %add
}
-define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
-; RELAXED-LABEL: fadd_fmul_contract_f32:
-; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+define <2 x double> @fadd_fmul_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fadd_fmul_2xf64:
+; RELAXED: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32.mul $push0=, $1, $0
-; RELAXED-NEXT: f32.add $push1=, $pop0, $2
+; RELAXED-NEXT: f64x2.mul $push0=, $1, $0
+; RELAXED-NEXT: f64x2.add $push1=, $pop0, $2
; RELAXED-NEXT: return $pop1
;
-; STRICT-LABEL: fadd_fmul_contract_f32:
-; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-LABEL: fadd_fmul_2xf64:
+; STRICT: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f32.mul $push0=, $1, $0
-; STRICT-NEXT: f32.add $push1=, $pop0, $2
+; STRICT-NEXT: f64x2.mul $push0=, $1, $0
+; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
; STRICT-NEXT: return $pop1
- %mul = fmul contract float %b, %a
- %add = fadd contract float %mul, %c
- ret float %add
+;
+; NOFP16-LABEL: fadd_fmul_2xf64:
+; NOFP16: .functype fadd_fmul_2xf64 (v128, v128, v128) -> (v128)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: f64x2.mul $push0=, $1, $0
+; NOFP16-NEXT: f64x2.add $push1=, $pop0, $2
+; NOFP16-NEXT: return $pop1
+;
+; NOSIMD-LABEL: fadd_fmul_2xf64:
+; NOSIMD: .functype fadd_fmul_2xf64 (i32, f64, f64, f64, f64, f64, f64) -> ()
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: f64.mul $push0=, $4, $2
+; NOSIMD-NEXT: f64.add $push1=, $pop0, $6
+; NOSIMD-NEXT: f64.store 8($0), $pop1
+; NOSIMD-NEXT: f64.mul $push2=, $3, $1
+; NOSIMD-NEXT: f64.add $push3=, $pop2, $5
+; NOSIMD-NEXT: f64.store 0($0), $pop3
+; NOSIMD-NEXT: return
+ %mul = fmul <2 x double> %b, %a
+ %add = fadd <2 x double> %mul, %c
+ ret <2 x double> %add
}
define float @fma_f32(float %a, float %b, float %c) {
@@ -247,6 +1476,18 @@ define float @fma_f32(float %a, float %b, float %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2
; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fma_f32:
+; NOFP16: .functype fma_f32 (f32, f32, f32) -> (f32)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, fmaf, $0, $1, $2
+; NOFP16-NEXT: return $pop0
+;
+; NOSIMD-LABEL: fma_f32:
+; NOSIMD: .functype fma_f32 (f32, f32, f32) -> (f32)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fmaf, $0, $1, $2
+; NOSIMD-NEXT: return $pop0
%fma = call float @llvm.fma(float %a, float %b, float %c)
ret float %fma
}
@@ -263,6 +1504,18 @@ define double @fma_f64(double %a, double %b, double %c) {
; STRICT-NEXT: # %bb.0:
; STRICT-NEXT: call $push0=, fma, $0, $1, $2
; STRICT-NEXT: return $pop0
+;
+; NOFP16-LABEL: fma_f64:
+; NOFP16: .functype fma_f64 (f64, f64, f64) -> (f64)
+; NOFP16-NEXT: # %bb.0:
+; NOFP16-NEXT: call $push0=, fma, $0, $1, $2
+; NOFP16-NEXT: return $pop0
+;
+; NOSIMD-LABEL: fma_f64:
+; NOSIMD: .functype fma_f64 (f64, f64, f64) -> (f64)
+; NOSIMD-NEXT: # %bb.0:
+; NOSIMD-NEXT: call $push0=, fma, $0, $1, $2
+; NOSIMD-NEXT: return $pop0
%fma = call double @llvm.fma(double %a, double %b, double %c)
ret double %fma
}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
index 6e2d860..b90c1da 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -27,7 +27,7 @@ define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4
; RELAXED-LABEL: fsub_fmul_contract_4xf32:
; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_4xf32:
@@ -46,15 +46,14 @@ define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x h
; RELAXED-LABEL: fsub_fmul_contract_8xf16:
; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f16x8.nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_8xf16:
; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
; STRICT-NEXT: # %bb.0:
-; STRICT-NEXT: f16x8.mul $push0=, $1, $0
-; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
-; STRICT-NEXT: return $pop1
+; STRICT-NEXT: f16x8.nmadd $push0=, $1, $0, $2
+; STRICT-NEXT: return $pop0
%mul = fmul contract <8 x half> %b, %a
%sub = fsub contract <8 x half> %c, %mul
ret <8 x half> %sub
@@ -84,9 +83,9 @@ define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8
; RELAXED-LABEL: fsub_fmul_contract_8xf32:
; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $4, $2, $6
; RELAXED-NEXT: v128.store 16($0), $pop0
-; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $3, $1, $5
; RELAXED-NEXT: v128.store 0($0), $pop1
; RELAXED-NEXT: return
;
@@ -110,7 +109,7 @@ define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b,
; RELAXED-LABEL: fsub_fmul_contract_2xf64:
; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
; RELAXED-NEXT: # %bb.0:
-; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $1, $0, $2
; RELAXED-NEXT: return $pop0
;
; STRICT-LABEL: fsub_fmul_contract_2xf64:
@@ -143,3 +142,55 @@ define float @fsub_fmul_contract_f32(float %a, float %b, float %c) {
ret float %sub
}
+define <8 x half> @fmuladd_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fmuladd_8xf16:
+; RELAXED: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_8xf16:
+; STRICT: .functype fmuladd_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.nmadd $push0=, $0, $1, $2
+; STRICT-NEXT: return $pop0
+ %fneg = fneg <8 x half> %a
+ %fma = call <8 x half> @llvm.fmuladd(<8 x half> %fneg, <8 x half> %b, <8 x half> %c)
+ ret <8 x half> %fma
+}
+
+define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fmuladd_4xf32:
+; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_4xf32:
+; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $0, $1
+; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %fneg = fneg <4 x float> %a
+ %fma = call <4 x float> @llvm.fmuladd(<4 x float> %fneg, <4 x float> %b, <4 x float> %c)
+ ret <4 x float> %fma
+}
+
+define <2 x double> @fmuladd_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fmuladd_2xf64:
+; RELAXED: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $0, $1, $2
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fmuladd_2xf64:
+; STRICT: .functype fmuladd_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $0, $1
+; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %fneg = fneg <2 x double> %a
+ %fma = call <2 x double> @llvm.fmuladd(<2 x double> %fneg, <2 x double> %b, <2 x double> %c)
+ ret <2 x double> %fma
+}
diff --git a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
index a0c243b..f3950b7 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition-with-dap.ll
@@ -1,16 +1,15 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-;; A minimal test case. llc will crash if global variables already has a section
-;; prefix. Subsequent PRs will expand on this test case to test the hotness
-;; reconciliation implementation.
-
-; RUN: not llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
+;; A minimal test case. Subsequent PRs will expand on this test case
+;; (e.g., with more functions, variables and profiles) and test the hotness
+;; reconcillation implementation.
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic \
; RUN: -partition-static-data-sections=true \
; RUN: -data-sections=true -unique-section-names=false \
-; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=ERR
+; RUN: %s -o - 2>&1 | FileCheck %s --check-prefix=IR
-; ERR: Global variable hot_bss already has a section prefix hot
+; IR: .section .bss.hot.,"aw"
@hot_bss = internal global i32 0, !section_prefix !17
diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll
index ce06d17..604b4fd 100644
--- a/llvm/test/CodeGen/X86/global-variable-partition.ll
+++ b/llvm/test/CodeGen/X86/global-variable-partition.ll
@@ -106,23 +106,31 @@ target triple = "x86_64-unknown-linux-gnu"
; UNIQ-NEXT: .section .data.unlikely.,"aw",@progbits,unique,8
; AGG-NEXT: .section .data.unlikely.,"aw",@progbits
+;; The `.section` directive is omitted for .data with -unique-section-names=false.
+; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+
; For @data_with_unknown_hotness
; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness
; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits
; UNIQ: .section .data,"aw",@progbits,unique,9
-; The `.section` directive is omitted for .data with -unique-section-names=false.
-; See MCSectionELF::shouldOmitSectionDirective for the implementation details.
+
; AGG: .data
; COMMON: .Ldata_with_unknown_hotness:
-; For @hot_data_custom_bar_section
-; It has an explicit section attribute 'var' and shouldn't have hot or unlikely suffix.
+; For variables that are not eligible for section prefix annotation
; COMMON: .type hot_data_custom_bar_section,@object
; SYM-NEXT: .section bar,"aw",@progbits
; SYM: hot_data_custom_bar_section
; UNIQ: .section bar,"aw",@progbits
; AGG: .section bar,"aw",@progbits
+; SYM: .section .data.llvm.fake_var,"aw"
+; UNIQ: .section .data,"aw"
+; AGG: .data
+
+;; No section for linker declaration
+; COMMON-NOT: qux
+
@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1
@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1
@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3]
@@ -137,6 +145,8 @@ target triple = "x86_64-unknown-linux-gnu"
@data3 = internal global i32 3
@data_with_unknown_hotness = private global i32 5
@hot_data_custom_bar_section = internal global i32 101 #0
+@llvm.fake_var = internal global i32 123
+@qux = external global i64
define void @cold_func(i32 %0) !prof !15 {
%2 = load i32, ptr @cold_bss
diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll
index 5aa266d..69abf6e 100644
--- a/llvm/test/CodeGen/X86/setcc-wide-types.ll
+++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll
@@ -1447,3 +1447,158 @@ define i1 @eq_i512_load_arg(ptr%p, i512 %b) {
%r = icmp eq i512 %a, %b
ret i1 %r
}
+
+; Tests for any/allbits from memory.
+
+define i1 @anybits_i128_load_arg(ptr %w) {
+; ANY-LABEL: anybits_i128_load_arg:
+; ANY: # %bb.0:
+; ANY-NEXT: movq (%rdi), %rax
+; ANY-NEXT: orq 8(%rdi), %rax
+; ANY-NEXT: setne %al
+; ANY-NEXT: retq
+ %ld = load i128, ptr %w
+ %cmp = icmp ne i128 %ld, 0
+ ret i1 %cmp
+}
+
+define i1 @allbits_i128_load_arg(ptr %w) {
+; SSE2-LABEL: allbits_i128_load_arg:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pcmpeqb (%rdi), %xmm0
+; SSE2-NEXT: pmovmskb %xmm0, %eax
+; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE2-NEXT: sete %al
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: allbits_i128_load_arg:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa (%rdi), %xmm0
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: ptest %xmm1, %xmm0
+; SSE41-NEXT: setb %al
+; SSE41-NEXT: retq
+;
+; AVXANY-LABEL: allbits_i128_load_arg:
+; AVXANY: # %bb.0:
+; AVXANY-NEXT: vmovdqa (%rdi), %xmm0
+; AVXANY-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVXANY-NEXT: vptest %xmm1, %xmm0
+; AVXANY-NEXT: setb %al
+; AVXANY-NEXT: retq
+ %ld = load i128, ptr %w
+ %cmp = icmp eq i128 %ld, -1
+ ret i1 %cmp
+}
+
+define i1 @anybits_i256_load_arg(ptr %w) {
+; ANY-LABEL: anybits_i256_load_arg:
+; ANY: # %bb.0:
+; ANY-NEXT: movq (%rdi), %rax
+; ANY-NEXT: movq 8(%rdi), %rcx
+; ANY-NEXT: orq 24(%rdi), %rcx
+; ANY-NEXT: orq 16(%rdi), %rax
+; ANY-NEXT: orq %rcx, %rax
+; ANY-NEXT: setne %al
+; ANY-NEXT: retq
+ %ld = load i256, ptr %w
+ %cmp = icmp ne i256 %ld, 0
+ ret i1 %cmp
+}
+
+define i1 @allbits_i256_load_arg(ptr %w) {
+; SSE-LABEL: allbits_i256_load_arg:
+; SSE: # %bb.0:
+; SSE-NEXT: movq (%rdi), %rax
+; SSE-NEXT: movq 8(%rdi), %rcx
+; SSE-NEXT: andq 24(%rdi), %rcx
+; SSE-NEXT: andq 16(%rdi), %rax
+; SSE-NEXT: andq %rcx, %rax
+; SSE-NEXT: cmpq $-1, %rax
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: allbits_i256_load_arg:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqu (%rdi), %ymm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
+; AVX1-NEXT: vptest %ymm1, %ymm0
+; AVX1-NEXT: setb %al
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: allbits_i256_load_arg:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqu (%rdi), %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX2-NEXT: vptest %ymm1, %ymm0
+; AVX2-NEXT: setb %al
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: allbits_i256_load_arg:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vmovdqu (%rdi), %ymm0
+; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; AVX512-NEXT: vptest %ymm1, %ymm0
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %ld = load i256, ptr %w
+ %cmp = icmp eq i256 %ld, -1
+ ret i1 %cmp
+}
+
+define i1 @anybits_i512_load_arg(ptr %w) {
+; ANY-LABEL: anybits_i512_load_arg:
+; ANY: # %bb.0:
+; ANY-NEXT: movq 16(%rdi), %rax
+; ANY-NEXT: movq (%rdi), %rcx
+; ANY-NEXT: movq 8(%rdi), %rdx
+; ANY-NEXT: movq 24(%rdi), %rsi
+; ANY-NEXT: orq 56(%rdi), %rsi
+; ANY-NEXT: orq 40(%rdi), %rdx
+; ANY-NEXT: orq %rsi, %rdx
+; ANY-NEXT: orq 48(%rdi), %rax
+; ANY-NEXT: orq 32(%rdi), %rcx
+; ANY-NEXT: orq %rax, %rcx
+; ANY-NEXT: orq %rdx, %rcx
+; ANY-NEXT: setne %al
+; ANY-NEXT: retq
+ %ld = load i512, ptr %w
+ %cmp = icmp ne i512 %ld, 0
+ ret i1 %cmp
+}
+
+define i1 @allbits_i512_load_arg(ptr %w) {
+; NO512-LABEL: allbits_i512_load_arg:
+; NO512: # %bb.0:
+; NO512-NEXT: movq 16(%rdi), %rax
+; NO512-NEXT: movq (%rdi), %rcx
+; NO512-NEXT: movq 8(%rdi), %rdx
+; NO512-NEXT: movq 24(%rdi), %rsi
+; NO512-NEXT: andq 56(%rdi), %rsi
+; NO512-NEXT: andq 40(%rdi), %rdx
+; NO512-NEXT: andq %rsi, %rdx
+; NO512-NEXT: andq 48(%rdi), %rax
+; NO512-NEXT: andq 32(%rdi), %rcx
+; NO512-NEXT: andq %rax, %rcx
+; NO512-NEXT: andq %rdx, %rcx
+; NO512-NEXT: cmpq $-1, %rcx
+; NO512-NEXT: sete %al
+; NO512-NEXT: retq
+;
+; AVX512-LABEL: allbits_i512_load_arg:
+; AVX512: # %bb.0:
+; AVX512-NEXT: vpternlogd {{.*#+}} zmm0 = -1
+; AVX512-NEXT: vpcmpneqd (%rdi), %zmm0, %k0
+; AVX512-NEXT: kortestw %k0, %k0
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
+ %ld = load i512, ptr %w
+ %cmp = icmp eq i512 %ld, -1
+ ret i1 %cmp
+}
diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s
index 48aec4b..57da338 100644
--- a/llvm/test/MC/WebAssembly/simd-encodings.s
+++ b/llvm/test/MC/WebAssembly/simd-encodings.s
@@ -917,11 +917,11 @@ main:
# CHECK: f16x8.nearest # encoding: [0xfd,0xb6,0x02]
f16x8.nearest
- # CHECK: f16x8.relaxed_madd # encoding: [0xfd,0xce,0x02]
- f16x8.relaxed_madd
+ # CHECK: f16x8.madd # encoding: [0xfd,0xce,0x02]
+ f16x8.madd
- # CHECK: f16x8.relaxed_nmadd # encoding: [0xfd,0xcf,0x02]
- f16x8.relaxed_nmadd
+ # CHECK: f16x8.nmadd # encoding: [0xfd,0xcf,0x02]
+ f16x8.nmadd
# CHECK: i16x8.trunc_sat_f16x8_s # encoding: [0xfd,0xc5,0x02]
i16x8.trunc_sat_f16x8_s
diff --git a/llvm/test/TableGen/listsplat.td b/llvm/test/TableGen/listsplat.td
index 5a93a4c..43803d6 100644
--- a/llvm/test/TableGen/listsplat.td
+++ b/llvm/test/TableGen/listsplat.td
@@ -1,4 +1,5 @@
// RUN: llvm-tblgen %s | FileCheck %s
+// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s
// CHECK: ------------- Classes -----------------
// CHECK-NEXT: class X<int X:a = ?, int X:b = ?> {
@@ -73,3 +74,8 @@ def DYa1 : Y<"a", 1>;
def DYa2 : Y<"a", 2>;
def DZ : X<42, !size([1, 2, 3])>;
+
+#ifdef ERROR1
+// ERROR1: !listsplat count -1 is negative
+defvar E = !listsplat("", -1);
+#endif
diff --git a/llvm/test/Transforms/PGOProfile/data-access-profile.ll b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
index 29198f34..205184b 100644
--- a/llvm/test/Transforms/PGOProfile/data-access-profile.ll
+++ b/llvm/test/Transforms/PGOProfile/data-access-profile.ll
@@ -3,55 +3,72 @@
; RUN: rm -rf %t && split-file %s %t && cd %t
-;; Read a text profile and merge it into indexed profile.
+;; Read text profiles and merge them into indexed profiles.
; RUN: llvm-profdata merge --memprof-version=4 memprof.yaml -o memprof.profdata
+; RUN: llvm-profdata merge --memprof-version=4 memprof-no-dap.yaml -o memprof-no-dap.profdata
;; Run optimizer pass on an IR module without IR functions, and test that global
;; variables in the module could be annotated (i.e., no early return),
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
-; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+; RUN: -debug-only=memprof -stats -S funcless-module.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT
;; Run optimizer pass on the IR, and check the section prefix.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' -memprof-annotate-static-data-prefix \
-; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,PREFIX,STAT
+; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefixes=LOG,IR,STAT
-;; Run optimizer pass without explicitly setting -memprof-annotate-static-data-prefix.
-;; The output text IR shouldn't have `section_prefix`
+;; Run memprof without providing memprof data. Test that IR has module flag
+;; `EnableDataAccessProf` as 0.
+; RUN: opt -passes='memprof-use<profile-filename=memprof-no-dap.profdata>' -memprof-annotate-static-data-prefix \
+; RUN: -debug-only=memprof -stats -S input.ll -o - 2>&1 | FileCheck %s --check-prefix=FLAG
+
+;; Run memprof without explicitly setting -memprof-annotate-static-data-prefix.
+;; The output text IR shouldn't have `section_prefix` or EnableDataAccessProf module flag.
; RUN: opt -passes='memprof-use<profile-filename=memprof.profdata>' \
-; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --implicit-check-not="section_prefix"
+; RUN: -debug-only=memprof -stats -S input.ll -o - | FileCheck %s --check-prefix=FLAGLESS --implicit-check-not="section_prefix"
; LOG: Skip annotating string literal .str
; LOG: Global variable var1 is annotated as hot
; LOG: Global variable var2.llvm.125 is annotated as hot
; LOG: Global variable bar is not annotated
; LOG: Global variable foo is annotated as unlikely
-; LOG: Global variable var3 has explicit section name. Skip annotating.
-; LOG: Global variable var4 has explicit section name. Skip annotating.
+; LOG: Skip annotation for var3 due to explicit section name.
+; LOG: Skip annotation for var4 due to explicit section name.
+; LOG: Skip annotation for llvm.fake_var due to name starts with `llvm.`.
+; LOG: Skip annotation for qux due to linker declaration.
;; String literals are not annotated.
-; PREFIX: @.str = unnamed_addr constant [5 x i8] c"abcde"
-; PREFIX-NOT: section_prefix
-; PREFIX: @var1 = global i32 123, !section_prefix !0
+; IR: @.str = unnamed_addr constant [5 x i8] c"abcde"
+; IR-NOT: section_prefix
+; IR: @var1 = global i32 123, !section_prefix !0
;; @var.llvm.125 will be canonicalized to @var2 for profile look-up.
-; PREFIX-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
+; IR-NEXT: @var2.llvm.125 = global i64 0, !section_prefix !0
;; @bar is not seen in hot symbol or known symbol set, so it won't get a section
;; prefix. Test this by testing that there is no section_prefix between @bar and
;; @foo.
-; PREFIX-NEXT: @bar = global i16 3
-; PREFIX-NOT: !section_prefix
+; IR-NEXT: @bar = global i16 3
+; IR-NOT: !section_prefix
;; @foo is unlikely.
-; PREFIX-NEXT: @foo = global i8 2, !section_prefix !1
+; IR-NEXT: @foo = global i8 2, !section_prefix !1
+
+; IR-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
+; IR-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
+
+; IR: @llvm.fake_var = global i32 123
+; IR-NOT: !section_prefix
+; IR: @qux = external global i64
+; IR-NOT: !section_prefix
-; PREFIX-NEXT: @var3 = constant [2 x i32] [i32 12345, i32 6789], section "sec1"
-; PREFIX-NEXT: @var4 = constant [1 x i64] [i64 98765] #0
+; IR: attributes #0 = { "rodata-section"="sec2" }
-; PREFIX: attributes #0 = { "rodata-section"="sec2" }
+; IR: !0 = !{!"section_prefix", !"hot"}
+; IR-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+; IR-NEXT: !2 = !{i32 2, !"EnableDataAccessProf", i32 1}
-; PREFIX: !0 = !{!"section_prefix", !"hot"}
-; PREFIX-NEXT: !1 = !{!"section_prefix", !"unlikely"}
+; FLAG: !{i32 2, !"EnableDataAccessProf", i32 0}
+; FLAGLESS-NOT: EnableDataAccessProf
; STAT: 1 memprof - Number of global vars annotated with 'unlikely' section prefix.
; STAT: 2 memprof - Number of global vars with user-specified section (not annotated).
@@ -72,6 +89,24 @@ DataAccessProfiles:
- foo
KnownColdStrHashes: [ 999, 1001 ]
...
+;--- memprof-no-dap.yaml
+---
+# A memprof file with without data access profiles. The heap records are simplified
+# to pass profile parsing and don't need to match the IR.
+HeapProfileRecords:
+ - GUID: 0xdeadbeef12345678
+ AllocSites:
+ - Callstack:
+ - { Function: 0x1111111111111111, LineOffset: 11, Column: 10, IsInlineFrame: true }
+ MemInfoBlock:
+ AllocCount: 111
+ TotalSize: 222
+ TotalLifetime: 333
+ TotalLifetimeAccessDensity: 444
+ CallSites:
+ - Frames:
+ - { Function: 0x5555555555555555, LineOffset: 55, Column: 50, IsInlineFrame: true }
+...
;--- input.ll
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -84,11 +119,14 @@ target triple = "x86_64-unknown-linux-gnu"
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0
+@llvm.fake_var = global i32 123
+@qux = external global i64
define i32 @func() {
%a = load i32, ptr @var1
%b = load i32, ptr @var2.llvm.125
- %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b)
+ %c = load i32, ptr @llvm.fake_var
+ %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c)
ret i32 %ret
}
@@ -108,5 +146,8 @@ target triple = "x86_64-unknown-linux-gnu"
@foo = global i8 2
@var3 = constant [2 x i32][i32 12345, i32 6789], section "sec1"
@var4 = constant [1 x i64][i64 98765] #0
+@llvm.fake_var = global i32 123
+@qux = external global i64
+
attributes #0 = { "rodata-section"="sec2" }