# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=amdgpu-insert-delay-alu %s -o - | FileCheck %s --- name: wmma_xdl_twoaddr_trans tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: {{^}}wmma_xdl_twoaddr_trans: ; CHECK: %bb.0: ; CHECK-NEXT: v_wmma_f32_16x16x64_fp8_fp8 v[8:15], v[0:7], v[0:7], v[8:15] ; CHECK-NEXT: v_exp_f32_e32 v16, v16 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) ; CHECK-NEXT: v_add_nc_u32_e32 v17, v17, v8 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16, $vgpr17 $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, 0, implicit $exec $vgpr16 = V_EXP_F32_e32 $vgpr16, implicit $exec, implicit $mode $vgpr17 = V_ADD_U32_e32 $vgpr17, $vgpr8, implicit $exec ... --- name: wmma_xdl_threeaddr_trans tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: {{^}}wmma_xdl_threeaddr_trans: ; CHECK: %bb.0: ; CHECK-NEXT: v_wmma_f32_16x16x64_fp8_fp8 v[8:15], v[0:7], v[0:7], v[16:23] ; CHECK-NEXT: v_exp_f32_e32 v24, v24 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) ; CHECK-NEXT: v_add_nc_u32_e32 v25, v25, v8 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24, $vgpr25 $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_WMMA_F32_16X16X64_FP8_FP8_w32_threeaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 8, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, 0, 0, 0, 0, implicit $exec $vgpr24 = V_EXP_F32_e32 $vgpr24, implicit $exec, implicit $mode $vgpr25 = V_ADD_U32_e32 $vgpr25, $vgpr8, implicit $exec ... name: swmmac_xdl_twoaddr_trans tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: {{^}}swmmac_xdl_twoaddr_trans: ; CHECK: %bb.0: ; CHECK-NEXT: v_swmmac_f16_16x16x128_bf8_bf8 v[24:27], v[0:7], v[8:23], v[28:29] ; CHECK-NEXT: v_exp_f32_e32 v30, v30 ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) ; CHECK-NEXT: v_add_nc_u32_e32 v31, v31, v24 liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 $vgpr24_vgpr25_vgpr26_vgpr27 = V_SWMMAC_F16_16X16X128_BF8_BF8_w32_twoaddr $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27, $vgpr28_vgpr29, 0, 0, 0, implicit $exec $vgpr30 = V_EXP_F32_e32 $vgpr30, implicit $exec, implicit $mode $vgpr31 = V_ADD_U32_e32 $vgpr31, $vgpr24, implicit $exec ... name: wmma_non_xdl_large_data_valu tracksRegLiveness: true body: | bb.0: ; CHECK-LABEL: {{^}}wmma_non_xdl_large_data_valu: ; CHECK: %bb.0: ; CHECK-NEXT: v_wmma_f32_16x16x4_f32 v[4:11], v[0:1], v[2:3], v[4:11] matrix_b_reuse ; CHECK-NEXT: v_exp_f32_e32 v12, v12 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) ; CHECK-NEXT: v_add_nc_u32_e32 v13, v13, v8 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7, $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, $vgpr12, $vgpr13 $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11 = V_WMMA_F32_16X16X4_F32_w32_twoaddr 8, $vgpr0_vgpr1, 8, $vgpr2_vgpr3, 8, $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, 0, -1, 0, 0, implicit $exec $vgpr12 = V_EXP_F32_e32 $vgpr12, implicit $exec, implicit $mode $vgpr13 = V_ADD_U32_e32 $vgpr13, $vgpr8, implicit $exec ...