aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir')
-rw-r--r--llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir1646
1 files changed, 1646 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir
new file mode 100644
index 0000000..fa3b9244
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/wmma-hazards-gfx1250-w32.mir
@@ -0,0 +1,1646 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX1250 %s
+
+# For two conscutive wmma instructions, we need to insert one V_NOP instruction between
+# them if matrix A, B or index of the second wmma are the same or overlap with previous
+# wmma instruction’s D-matrix.
+
+---
+name: test_wmma_f32_16x16x4_f32_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x4_f32_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr4_vgpr5, 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr4_vgpr5, 8, killed $vgpr16_vgpr17, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x4_f32_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x4_f32_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr14_vgpr15, 8, killed $vgpr4_vgpr5, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr14_vgpr15, 8, killed $vgpr4_vgpr5, 8, killed $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x4_f32_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x4_f32_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr4_vgpr5, 0, 0, 0, implicit $exec
+ $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, killed $vgpr0_vgpr1, 8, killed $vgpr2_vgpr3, 8, killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr4_vgpr5, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16_16x16x32_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_BF16_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr26_vgpr27_vgpr28_vgpr29 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 9, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr26_vgpr27_vgpr28_vgpr29 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 9, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr34_vgpr35_vgpr36_vgpr37 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 9, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr34_vgpr35_vgpr36_vgpr37 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr0_vgpr1_vgpr2_vgpr3 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 9, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_bf16f32_16x16x32_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr26_vgpr27_vgpr28_vgpr29 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr26_vgpr27, 0, 0, 0, implicit $exec
+ $vgpr26_vgpr27_vgpr28_vgpr29 = V_WMMA_BF16F32_16X16X32_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr26_vgpr27, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_fp8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x64_bf8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fp8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fb8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fb8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_bf8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_bf8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_fb8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_fb8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x64_bf8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X64_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_I32_16x16x64_IU8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_I32_16x16x64_IU8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_I32_16x16x64_IU8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_I32_16x16x64_IU8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_I32_16x16x64_IU8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_I32_16x16x64_IU8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_I32_16X16X64_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_f16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_f16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_f16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_f16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 8, killed $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f32_16x16x32_f16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f32_16x16x32_f16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+ $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23 = V_WMMA_F32_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr16_vgpr17, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x32_f16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x32_f16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x32_f16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x32_f16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+ $vgpr30_vgpr31_vgpr32_vgpr33 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, 0, implicit $exec
+ $vgpr38_vgpr39_vgpr40_vgpr41 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29, 8, killed $vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37, 8, killed $vgpr38_vgpr39_vgpr40_vgpr41, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_f16_16x16x32_f16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_f16_16x16x32_f16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+ $vgpr22_vgpr23_vgpr24_vgpr25 = V_WMMA_F16_16X16X32_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 8, killed $vgpr22_vgpr23_vgpr24_vgpr25, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr22_vgpr23, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, 2, 2, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr81, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr81, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale_F32_16x16x128_F8F6F4_F6F4_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr81, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr81, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F3216_16x16x128_F8F6F4_F8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F3216_16x16x128_F8F6F4_F8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F32_16x16x128_F8F6F4_F8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F32_16x16x128_F8F6F4_F8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F3216_16x16x128_F8F6F4_F6f4_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F3216_16x16x128_F8F6F4_F6f4_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F32_16x16x128_F8F6F4_F6f4_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F32_16x16x128_F8F6F4_F6f4_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE16_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41, killed $vgpr42_vgpr43, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_WMMA_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, 1, 2, 0, 0, implicit $exec
+...
+
+---
+name: test_wmma_scale16_F32_16x16x128_F8F6F4_F6f4_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_wmma_scale16_F32_16x16x128_F8F6F4_F6f4_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_WMMA_SCALE_F32_16X16X128_F8F6F4_f8_f8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40, killed $vgpr41, 2, 2, 0, 0, 1, 1, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr32_vgpr33, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16_16x16x64_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35 = V_SWMMAC_BF16_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_bf16f32_16x16x64_bf16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_BF16F32_16X16X64_BF16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_fp8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x128_bf8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_fp8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_BF8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_fp8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_BF8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51, killed $vgpr90_vgpr91, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x128_bf8_bf8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88_vgpr89, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_BF8_BF8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr90_vgpr91, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr90_vgpr91, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr90_vgpr91, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr90_vgpr91, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_i32_16x16x128_iu8_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_I32_16X16X128_IU8_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88_vgpr89, 0, 0, 0, 0, implicit $exec
+ $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 = V_SWMMAC_F32_16X16X128_FP8_FP8_w32_twoaddr killed $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, killed $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55, killed $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, killed $vgpr24_vgpr25, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_f16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_f16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_f16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_f16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f32_16x16x64_f16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f32_16x16x64_f16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_SWMMAC_F32_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x64_f16_D0_overlaps_A1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x64_f16_D0_overlaps_A1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x64_f16_D0_overlaps_B1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x64_f16_D0_overlaps_B1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+...
+
+---
+name: test_swmmac_f16_16x16x64_f16_D0_overlaps_Index1
+body: |
+ bb.0:
+ ; GFX1250-LABEL: name: test_swmmac_f16_16x16x64_f16_D0_overlaps_Index1
+ ; GFX1250: early-clobber $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: V_NOP_e32 implicit $exec
+ ; GFX1250-NEXT: early-clobber $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr24_vgpr25_vgpr26_vgpr27, killed $vgpr88, 0, 0, 0, 0, 0, implicit $exec
+ $vgpr64_vgpr65_vgpr66_vgpr67 = V_SWMMAC_F16_16X16X64_F16_w32_twoaddr 8, killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, killed $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, killed $vgpr64_vgpr65_vgpr66_vgpr67, killed $vgpr24, 0, 0, 0, 0, 0, implicit $exec
+...