aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/bf16.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bf16.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll119
1 files changed, 83 insertions, 36 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 7859fcdf..52e697c 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -468,15 +468,28 @@ define <16 x bfloat> @v_load_global_v16bf16(ptr addrspace(1) %ptr) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_load_global_v16bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v9, v1
-; GFX9-NEXT: v_mov_b32_e32 v8, v0
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[8:9], off
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_load_global_v16bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v9, v1
+; GFX900-NEXT: v_mov_b32_e32 v8, v0
+; GFX900-NEXT: global_load_dwordx4 v[0:3], v[8:9], off
+; GFX900-NEXT: global_load_dwordx4 v[4:7], v[8:9], off offset:16
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_load_global_v16bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[8:11], v[0:1], off
+; GFX950-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX950-NEXT: s_waitcnt vmcnt(1)
+; GFX950-NEXT: v_mov_b32_e32 v0, v8
+; GFX950-NEXT: v_mov_b32_e32 v1, v9
+; GFX950-NEXT: v_mov_b32_e32 v2, v10
+; GFX950-NEXT: v_mov_b32_e32 v3, v11
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_load_global_v16bf16:
; GFX10: ; %bb.0:
@@ -619,17 +632,32 @@ define <32 x bfloat> @v_load_global_v32bf16(ptr addrspace(1) %ptr) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_load_global_v32bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v17, v1
-; GFX9-NEXT: v_mov_b32_e32 v16, v0
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[16:17], off
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v[16:17], off offset:16
-; GFX9-NEXT: global_load_dwordx4 v[8:11], v[16:17], off offset:32
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v[16:17], off offset:48
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_load_global_v32bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v17, v1
+; GFX900-NEXT: v_mov_b32_e32 v16, v0
+; GFX900-NEXT: global_load_dwordx4 v[0:3], v[16:17], off
+; GFX900-NEXT: global_load_dwordx4 v[4:7], v[16:17], off offset:16
+; GFX900-NEXT: global_load_dwordx4 v[8:11], v[16:17], off offset:32
+; GFX900-NEXT: global_load_dwordx4 v[12:15], v[16:17], off offset:48
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_load_global_v32bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[16:19], v[0:1], off
+; GFX950-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX950-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
+; GFX950-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
+; GFX950-NEXT: s_waitcnt vmcnt(3)
+; GFX950-NEXT: v_mov_b32_e32 v0, v16
+; GFX950-NEXT: v_mov_b32_e32 v1, v17
+; GFX950-NEXT: v_mov_b32_e32 v2, v18
+; GFX950-NEXT: v_mov_b32_e32 v3, v19
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_load_global_v32bf16:
; GFX10: ; %bb.0:
@@ -877,22 +905,41 @@ define <64 x bfloat> @v_load_global_v64bf16(ptr addrspace(1) %ptr) {
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-LABEL: v_load_global_v64bf16:
-; GFX9: ; %bb.0:
-; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT: v_mov_b32_e32 v29, v1
-; GFX9-NEXT: v_mov_b32_e32 v28, v0
-; GFX9-NEXT: global_load_dwordx4 v[0:3], v[28:29], off
-; GFX9-NEXT: global_load_dwordx4 v[4:7], v[28:29], off offset:16
-; GFX9-NEXT: global_load_dwordx4 v[8:11], v[28:29], off offset:32
-; GFX9-NEXT: global_load_dwordx4 v[12:15], v[28:29], off offset:48
-; GFX9-NEXT: global_load_dwordx4 v[16:19], v[28:29], off offset:64
-; GFX9-NEXT: global_load_dwordx4 v[20:23], v[28:29], off offset:80
-; GFX9-NEXT: global_load_dwordx4 v[24:27], v[28:29], off offset:96
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: global_load_dwordx4 v[28:31], v[28:29], off offset:112
-; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: s_setpc_b64 s[30:31]
+; GFX900-LABEL: v_load_global_v64bf16:
+; GFX900: ; %bb.0:
+; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-NEXT: v_mov_b32_e32 v29, v1
+; GFX900-NEXT: v_mov_b32_e32 v28, v0
+; GFX900-NEXT: global_load_dwordx4 v[0:3], v[28:29], off
+; GFX900-NEXT: global_load_dwordx4 v[4:7], v[28:29], off offset:16
+; GFX900-NEXT: global_load_dwordx4 v[8:11], v[28:29], off offset:32
+; GFX900-NEXT: global_load_dwordx4 v[12:15], v[28:29], off offset:48
+; GFX900-NEXT: global_load_dwordx4 v[16:19], v[28:29], off offset:64
+; GFX900-NEXT: global_load_dwordx4 v[20:23], v[28:29], off offset:80
+; GFX900-NEXT: global_load_dwordx4 v[24:27], v[28:29], off offset:96
+; GFX900-NEXT: s_nop 0
+; GFX900-NEXT: global_load_dwordx4 v[28:31], v[28:29], off offset:112
+; GFX900-NEXT: s_waitcnt vmcnt(0)
+; GFX900-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX950-LABEL: v_load_global_v64bf16:
+; GFX950: ; %bb.0:
+; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX950-NEXT: global_load_dwordx4 v[32:35], v[0:1], off
+; GFX950-NEXT: global_load_dwordx4 v[4:7], v[0:1], off offset:16
+; GFX950-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:32
+; GFX950-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:48
+; GFX950-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:64
+; GFX950-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:80
+; GFX950-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:96
+; GFX950-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:112
+; GFX950-NEXT: s_waitcnt vmcnt(7)
+; GFX950-NEXT: v_mov_b32_e32 v0, v32
+; GFX950-NEXT: v_mov_b32_e32 v1, v33
+; GFX950-NEXT: v_mov_b32_e32 v2, v34
+; GFX950-NEXT: v_mov_b32_e32 v3, v35
+; GFX950-NEXT: s_waitcnt vmcnt(0)
+; GFX950-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: v_load_global_v64bf16:
; GFX10: ; %bb.0: