aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll340
1 files changed, 170 insertions, 170 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
index 8056881..b25fe83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
@@ -17,24 +17,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) %
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
-; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
-; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[2:3]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT: v_mov_b32_e32 v13, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr:
@@ -547,24 +547,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1)
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
-; GCN-NEXT: v_mov_b32_e32 v12, 0
+; GCN-NEXT: v_mov_b32_e32 v16, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
+; GCN-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
; GCN-NEXT: s_load_dword s16, s[4:5], 0x64
-; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
-; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
+; GCN-NEXT: v_mov_b64_e32 v[14:15], s[2:3]
+; GCN-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT: v_mov_b32_e32 v13, s16
+; GCN-NEXT: v_mov_b32_e32 v17, s16
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
+; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; GCN-NEXT: s_nop 7
-; GCN-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
+; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; GCN-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -855,30 +855,30 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) %
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__vgpr:
@@ -1032,22 +1032,22 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -1397,30 +1397,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__vgpr:
@@ -1566,30 +1566,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__vgpr:
@@ -1735,30 +1735,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__vgpr:
@@ -1904,30 +1904,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__vgpr:
@@ -2081,22 +2081,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -2454,22 +2454,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -2827,22 +2827,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -3200,22 +3200,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -3552,4 +3552,4 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg
ret <16 x float> %result
}
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-agpr-alloc"="0,0" }