diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll | 340 |
1 files changed, 170 insertions, 170 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll index 8056881..b25fe83 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll @@ -17,24 +17,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) % ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; SDAG-NEXT: v_mov_b32_e32 v12, 0 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 -; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[2:3] +; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; SDAG-NEXT: v_mov_b32_e32 v13, s16 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr: @@ -547,24 +547,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1) ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 ; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GCN-NEXT: v_mov_b32_e32 v12, 0 +; GCN-NEXT: v_mov_b32_e32 v16, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7] +; GCN-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 ; GCN-NEXT: s_load_dword s16, s[4:5], 0x64 -; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3] -; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1] +; GCN-NEXT: v_mov_b64_e32 v[14:15], s[2:3] +; GCN-NEXT: v_mov_b64_e32 v[12:13], s[0:1] ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GCN-NEXT: v_mov_b32_e32 v13, s16 +; GCN-NEXT: v_mov_b32_e32 v17, s16 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2 +; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; GCN-NEXT: s_nop 7 -; GCN-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7] +; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; GCN-NEXT: s_endpgm bb: %id = call i32 @llvm.amdgcn.workitem.id.x() @@ -855,30 +855,30 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) % ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v14, s8 -; SDAG-NEXT: v_mov_b32_e32 v15, s9 -; SDAG-NEXT: v_mov_b32_e32 v16, s10 -; SDAG-NEXT: v_mov_b32_e32 v17, s11 -; SDAG-NEXT: v_mov_b32_e32 v2, s12 -; SDAG-NEXT: v_mov_b32_e32 v3, s13 -; SDAG-NEXT: v_mov_b32_e32 v4, s14 -; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v12, s8 +; SDAG-NEXT: v_mov_b32_e32 v13, s9 +; SDAG-NEXT: v_mov_b32_e32 v14, s10 +; SDAG-NEXT: v_mov_b32_e32 v15, s11 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v6, s0 -; SDAG-NEXT: v_mov_b32_e32 v7, s1 -; SDAG-NEXT: v_mov_b32_e32 v8, s2 -; SDAG-NEXT: v_mov_b32_e32 v9, s3 -; SDAG-NEXT: v_mov_b32_e32 v1, s16 +; SDAG-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-NEXT: v_mov_b32_e32 v5, s1 +; SDAG-NEXT: v_mov_b32_e32 v6, s2 +; SDAG-NEXT: v_mov_b32_e32 v7, s3 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__vgpr: @@ -1032,22 +1032,22 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v26, s8 -; SDAG-NEXT: v_mov_b32_e32 v27, s9 -; SDAG-NEXT: v_mov_b32_e32 v28, s10 -; SDAG-NEXT: v_mov_b32_e32 v29, s11 -; SDAG-NEXT: v_mov_b32_e32 v18, s12 -; SDAG-NEXT: v_mov_b32_e32 v19, s13 -; SDAG-NEXT: v_mov_b32_e32 v20, s14 -; SDAG-NEXT: v_mov_b32_e32 v21, s15 -; SDAG-NEXT: v_mov_b32_e32 v22, s0 -; SDAG-NEXT: v_mov_b32_e32 v23, s1 -; SDAG-NEXT: v_mov_b32_e32 v24, s2 -; SDAG-NEXT: v_mov_b32_e32 v25, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s0 +; SDAG-NEXT: v_mov_b32_e32 v21, s1 +; SDAG-NEXT: v_mov_b32_e32 v22, s2 +; SDAG-NEXT: v_mov_b32_e32 v23, s3 +; SDAG-NEXT: v_mov_b32_e32 v28, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 2 @@ -1397,30 +1397,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v14, s8 -; SDAG-NEXT: v_mov_b32_e32 v15, s9 -; SDAG-NEXT: v_mov_b32_e32 v16, s10 -; SDAG-NEXT: v_mov_b32_e32 v17, s11 -; SDAG-NEXT: v_mov_b32_e32 v2, s12 -; SDAG-NEXT: v_mov_b32_e32 v3, s13 -; SDAG-NEXT: v_mov_b32_e32 v4, s14 -; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v12, s8 +; SDAG-NEXT: v_mov_b32_e32 v13, s9 +; SDAG-NEXT: v_mov_b32_e32 v14, s10 +; SDAG-NEXT: v_mov_b32_e32 v15, s11 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v6, s0 -; SDAG-NEXT: v_mov_b32_e32 v7, s1 -; SDAG-NEXT: v_mov_b32_e32 v8, s2 -; SDAG-NEXT: v_mov_b32_e32 v9, s3 -; SDAG-NEXT: v_mov_b32_e32 v1, s16 +; SDAG-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-NEXT: v_mov_b32_e32 v5, s1 +; SDAG-NEXT: v_mov_b32_e32 v6, s2 +; SDAG-NEXT: v_mov_b32_e32 v7, s3 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__vgpr: @@ -1566,30 +1566,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v14, s8 -; SDAG-NEXT: v_mov_b32_e32 v15, s9 -; SDAG-NEXT: v_mov_b32_e32 v16, s10 -; SDAG-NEXT: v_mov_b32_e32 v17, s11 -; SDAG-NEXT: v_mov_b32_e32 v2, s12 -; SDAG-NEXT: v_mov_b32_e32 v3, s13 -; SDAG-NEXT: v_mov_b32_e32 v4, s14 -; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v12, s8 +; SDAG-NEXT: v_mov_b32_e32 v13, s9 +; SDAG-NEXT: v_mov_b32_e32 v14, s10 +; SDAG-NEXT: v_mov_b32_e32 v15, s11 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v6, s0 -; SDAG-NEXT: v_mov_b32_e32 v7, s1 -; SDAG-NEXT: v_mov_b32_e32 v8, s2 -; SDAG-NEXT: v_mov_b32_e32 v9, s3 -; SDAG-NEXT: v_mov_b32_e32 v1, s16 +; SDAG-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-NEXT: v_mov_b32_e32 v5, s1 +; SDAG-NEXT: v_mov_b32_e32 v6, s2 +; SDAG-NEXT: v_mov_b32_e32 v7, s3 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__vgpr: @@ -1735,30 +1735,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v14, s8 -; SDAG-NEXT: v_mov_b32_e32 v15, s9 -; SDAG-NEXT: v_mov_b32_e32 v16, s10 -; SDAG-NEXT: v_mov_b32_e32 v17, s11 -; SDAG-NEXT: v_mov_b32_e32 v2, s12 -; SDAG-NEXT: v_mov_b32_e32 v3, s13 -; SDAG-NEXT: v_mov_b32_e32 v4, s14 -; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v12, s8 +; SDAG-NEXT: v_mov_b32_e32 v13, s9 +; SDAG-NEXT: v_mov_b32_e32 v14, s10 +; SDAG-NEXT: v_mov_b32_e32 v15, s11 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v6, s0 -; SDAG-NEXT: v_mov_b32_e32 v7, s1 -; SDAG-NEXT: v_mov_b32_e32 v8, s2 -; SDAG-NEXT: v_mov_b32_e32 v9, s3 -; SDAG-NEXT: v_mov_b32_e32 v1, s16 +; SDAG-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-NEXT: v_mov_b32_e32 v5, s1 +; SDAG-NEXT: v_mov_b32_e32 v6, s2 +; SDAG-NEXT: v_mov_b32_e32 v7, s3 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__vgpr: @@ -1904,30 +1904,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace ; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 ; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34 +; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7] +; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7] ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 -; SDAG-NEXT: v_mov_b32_e32 v0, 0 -; SDAG-NEXT: v_mov_b32_e32 v14, s8 -; SDAG-NEXT: v_mov_b32_e32 v15, s9 -; SDAG-NEXT: v_mov_b32_e32 v16, s10 -; SDAG-NEXT: v_mov_b32_e32 v17, s11 -; SDAG-NEXT: v_mov_b32_e32 v2, s12 -; SDAG-NEXT: v_mov_b32_e32 v3, s13 -; SDAG-NEXT: v_mov_b32_e32 v4, s14 -; SDAG-NEXT: v_mov_b32_e32 v5, s15 +; SDAG-NEXT: v_mov_b32_e32 v12, s8 +; SDAG-NEXT: v_mov_b32_e32 v13, s9 +; SDAG-NEXT: v_mov_b32_e32 v14, s10 +; SDAG-NEXT: v_mov_b32_e32 v15, s11 +; SDAG-NEXT: v_mov_b32_e32 v0, s12 +; SDAG-NEXT: v_mov_b32_e32 v1, s13 +; SDAG-NEXT: v_mov_b32_e32 v2, s14 +; SDAG-NEXT: v_mov_b32_e32 v3, s15 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v6, s0 -; SDAG-NEXT: v_mov_b32_e32 v7, s1 -; SDAG-NEXT: v_mov_b32_e32 v8, s2 -; SDAG-NEXT: v_mov_b32_e32 v9, s3 -; SDAG-NEXT: v_mov_b32_e32 v1, s16 +; SDAG-NEXT: v_mov_b32_e32 v4, s0 +; SDAG-NEXT: v_mov_b32_e32 v5, s1 +; SDAG-NEXT: v_mov_b32_e32 v6, s2 +; SDAG-NEXT: v_mov_b32_e32 v7, s3 +; SDAG-NEXT: v_mov_b32_e32 v17, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2 ; SDAG-NEXT: s_nop 7 -; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7] +; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7] ; SDAG-NEXT: s_endpgm ; ; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__vgpr: @@ -2081,22 +2081,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace( ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v26, s8 -; SDAG-NEXT: v_mov_b32_e32 v27, s9 -; SDAG-NEXT: v_mov_b32_e32 v28, s10 -; SDAG-NEXT: v_mov_b32_e32 v29, s11 -; SDAG-NEXT: v_mov_b32_e32 v18, s12 -; SDAG-NEXT: v_mov_b32_e32 v19, s13 -; SDAG-NEXT: v_mov_b32_e32 v20, s14 -; SDAG-NEXT: v_mov_b32_e32 v21, s15 -; SDAG-NEXT: v_mov_b32_e32 v22, s0 -; SDAG-NEXT: v_mov_b32_e32 v23, s1 -; SDAG-NEXT: v_mov_b32_e32 v24, s2 -; SDAG-NEXT: v_mov_b32_e32 v25, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s0 +; SDAG-NEXT: v_mov_b32_e32 v21, s1 +; SDAG-NEXT: v_mov_b32_e32 v22, s2 +; SDAG-NEXT: v_mov_b32_e32 v23, s3 +; SDAG-NEXT: v_mov_b32_e32 v28, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 2 @@ -2454,22 +2454,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace( ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v26, s8 -; SDAG-NEXT: v_mov_b32_e32 v27, s9 -; SDAG-NEXT: v_mov_b32_e32 v28, s10 -; SDAG-NEXT: v_mov_b32_e32 v29, s11 -; SDAG-NEXT: v_mov_b32_e32 v18, s12 -; SDAG-NEXT: v_mov_b32_e32 v19, s13 -; SDAG-NEXT: v_mov_b32_e32 v20, s14 -; SDAG-NEXT: v_mov_b32_e32 v21, s15 -; SDAG-NEXT: v_mov_b32_e32 v22, s0 -; SDAG-NEXT: v_mov_b32_e32 v23, s1 -; SDAG-NEXT: v_mov_b32_e32 v24, s2 -; SDAG-NEXT: v_mov_b32_e32 v25, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s0 +; SDAG-NEXT: v_mov_b32_e32 v21, s1 +; SDAG-NEXT: v_mov_b32_e32 v22, s2 +; SDAG-NEXT: v_mov_b32_e32 v23, s3 +; SDAG-NEXT: v_mov_b32_e32 v28, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 2 @@ -2827,22 +2827,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace( ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v26, s8 -; SDAG-NEXT: v_mov_b32_e32 v27, s9 -; SDAG-NEXT: v_mov_b32_e32 v28, s10 -; SDAG-NEXT: v_mov_b32_e32 v29, s11 -; SDAG-NEXT: v_mov_b32_e32 v18, s12 -; SDAG-NEXT: v_mov_b32_e32 v19, s13 -; SDAG-NEXT: v_mov_b32_e32 v20, s14 -; SDAG-NEXT: v_mov_b32_e32 v21, s15 -; SDAG-NEXT: v_mov_b32_e32 v22, s0 -; SDAG-NEXT: v_mov_b32_e32 v23, s1 -; SDAG-NEXT: v_mov_b32_e32 v24, s2 -; SDAG-NEXT: v_mov_b32_e32 v25, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s0 +; SDAG-NEXT: v_mov_b32_e32 v21, s1 +; SDAG-NEXT: v_mov_b32_e32 v22, s2 +; SDAG-NEXT: v_mov_b32_e32 v23, s3 +; SDAG-NEXT: v_mov_b32_e32 v28, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 2 @@ -3200,22 +3200,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace( ; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64 ; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54 ; SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v26, s8 -; SDAG-NEXT: v_mov_b32_e32 v27, s9 -; SDAG-NEXT: v_mov_b32_e32 v28, s10 -; SDAG-NEXT: v_mov_b32_e32 v29, s11 -; SDAG-NEXT: v_mov_b32_e32 v18, s12 -; SDAG-NEXT: v_mov_b32_e32 v19, s13 -; SDAG-NEXT: v_mov_b32_e32 v20, s14 -; SDAG-NEXT: v_mov_b32_e32 v21, s15 -; SDAG-NEXT: v_mov_b32_e32 v22, s0 -; SDAG-NEXT: v_mov_b32_e32 v23, s1 -; SDAG-NEXT: v_mov_b32_e32 v24, s2 -; SDAG-NEXT: v_mov_b32_e32 v25, s3 -; SDAG-NEXT: v_mov_b32_e32 v16, s16 +; SDAG-NEXT: v_mov_b32_e32 v24, s8 +; SDAG-NEXT: v_mov_b32_e32 v25, s9 +; SDAG-NEXT: v_mov_b32_e32 v26, s10 +; SDAG-NEXT: v_mov_b32_e32 v27, s11 +; SDAG-NEXT: v_mov_b32_e32 v16, s12 +; SDAG-NEXT: v_mov_b32_e32 v17, s13 +; SDAG-NEXT: v_mov_b32_e32 v18, s14 +; SDAG-NEXT: v_mov_b32_e32 v19, s15 +; SDAG-NEXT: v_mov_b32_e32 v20, s0 +; SDAG-NEXT: v_mov_b32_e32 v21, s1 +; SDAG-NEXT: v_mov_b32_e32 v22, s2 +; SDAG-NEXT: v_mov_b32_e32 v23, s3 +; SDAG-NEXT: v_mov_b32_e32 v28, s16 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: s_nop 0 -; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2 +; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2 ; SDAG-NEXT: v_mov_b32_e32 v16, 0 ; SDAG-NEXT: s_nop 7 ; SDAG-NEXT: s_nop 2 @@ -3552,4 +3552,4 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg ret <16 x float> %result } -attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } +attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-agpr-alloc"="0,0" } |