aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll67
1 files changed, 67 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
index 9802144a..bf8308b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma.imod.gfx1250.w32.ll
@@ -1126,6 +1126,72 @@ bb:
ret void
}
+define amdgpu_ps void @test_wmma_f32_16x16x128_f8f6f4_negC(<16 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_wmma_f32_16x16x128_f8f6f4_negC:
+; GFX1250: ; %bb.0: ; %bb
+; GFX1250-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39] neg_lo:[0,0,1]
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GFX1250-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GFX1250-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_wmma_f32_16x16x128_f8f6f4_negC:
+; GISEL: ; %bb.0: ; %bb
+; GISEL-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39] neg_lo:[0,0,1]
+; GISEL-NEXT: s_clause 0x1
+; GISEL-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GISEL-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GISEL-NEXT: s_endpgm
+bb:
+ %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %A, i32 0, <16 x i32> %B, i16 1, <8 x float> %C)
+ store <8 x float> %res, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @test_wmma_f32_16x16x128_f8f6f4_neg_absC(<16 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_wmma_f32_16x16x128_f8f6f4_neg_absC:
+; GFX1250: ; %bb.0: ; %bb
+; GFX1250-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GFX1250-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GFX1250-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_wmma_f32_16x16x128_f8f6f4_neg_absC:
+; GISEL: ; %bb.0: ; %bb
+; GISEL-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GISEL-NEXT: s_clause 0x1
+; GISEL-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GISEL-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GISEL-NEXT: s_endpgm
+bb:
+ %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %A, i32 0, <16 x i32> %B, i16 3, <8 x float> %C)
+ store <8 x float> %res, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_ps void @test_wmma_f32_16x16x128_f8f6f4_ignoreC(<16 x i32> %A, <16 x i32> %B, <8 x float> %C, ptr addrspace(1) %out) {
+; GFX1250-LABEL: test_wmma_f32_16x16x128_f8f6f4_ignoreC:
+; GFX1250: ; %bb.0: ; %bb
+; GFX1250-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39]
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GFX1250-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GFX1250-NEXT: s_endpgm
+;
+; GISEL-LABEL: test_wmma_f32_16x16x128_f8f6f4_ignoreC:
+; GISEL: ; %bb.0: ; %bb
+; GISEL-NEXT: v_wmma_f32_16x16x128_f8f6f4 v[32:39], v[0:15], v[16:31], v[32:39]
+; GISEL-NEXT: s_clause 0x1
+; GISEL-NEXT: global_store_b128 v[40:41], v[32:35], off
+; GISEL-NEXT: global_store_b128 v[40:41], v[36:39], off offset:16
+; GISEL-NEXT: s_endpgm
+bb:
+ %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32 0, <16 x i32> %A, i32 0, <16 x i32> %B, i16 4, <8 x float> %C)
+ store <8 x float> %res, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_ps void @test_wmma_f16_16x16x128_fp8_fp8_negC(<16 x i32> %A, <16 x i32> %B, <8 x half> %C, ptr addrspace(1) %out) {
; GFX1250-LABEL: test_wmma_f16_16x16x128_fp8_fp8_negC:
; GFX1250: ; %bb.0: ; %bb
@@ -1967,6 +2033,7 @@ declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x64.bf8.bf8.v8f16.v8i32(<8 x i32>,
declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x64.iu8.v8i32.v8i32(i1 immarg, <8 x i32>, i1 immarg, <8 x i32>, <8 x i32>, i1, i1)
declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x32.f16.v8f32.v16f16(i1, <16 x half>, i1, <16 x half>, i16, <8 x float>, i1, i1)
declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x32.f16.v8f16.v16f16(i1, <16 x half>, i1, <16 x half>, i16, <8 x half>, i1, i1)
+declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x128.f8f6f4.v8f32.v16i32.v16i32(i32, <16 x i32>, i32, <16 x i32>, i16, <8 x float>)
declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.fp8.v8f16.v16i32(<16 x i32>, <16 x i32>, i16, <8 x half>, i1, i1)
declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.fp8.bf8.v8f16.v16i32(<16 x i32>, <16 x i32>, i16, <8 x half>, i1, i1)
declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x128.bf8.fp8.v8f16.v16i32(<16 x i32>, <16 x i32>, i16, <8 x half>, i1, i1)