aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir')
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir133
1 files changed, 133 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir
new file mode 100644
index 0000000..675a1c9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir
@@ -0,0 +1,133 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s
+
+
+# Expected vmcnt(0) since the direct load is the only load.
+---
+name: dma_then_fence
+body: |
+ bb.0:
+ ; GCN-LABEL: name: dma_then_fence
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: $m0 = S_MOV_B32 0
+ ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
+ ; GCN-NEXT: S_WAITCNT 3952
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
+ S_WAITCNT_lds_direct
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts.
+
+---
+name: dma_then_global_load
+body: |
+ bb.0:
+ ; GCN-LABEL: name: dma_then_global_load
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: $m0 = S_MOV_B32 0
+ ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT 3953
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ S_WAITCNT_lds_direct
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# Expected no vmcnt since there is no direct load to LDS, and the global load is not processed by SIInsertWaitcnts.
+
+---
+name: no_dma_just_fence
+body: |
+ bb.0:
+ ; GCN-LABEL: name: no_dma_just_fence
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ S_WAITCNT_lds_direct
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts.
+
+---
+name: dma_then_system_fence
+body: |
+ bb.0:
+ ; GCN-LABEL: name: dma_then_system_fence
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT 3953
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ S_WAITCNT_lds_direct
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# The computed vmcnt(1) gets merged with the existing vmcnt(0).
+
+---
+name: merge_with_prev_wait
+body: |
+ bb.0:
+ ; GCN-LABEL: name: merge_with_prev_wait
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: $m0 = S_MOV_B32 0
+ ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT 3952
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ S_WAITCNT 3952
+ S_WAITCNT_lds_direct
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...
+
+# The computed vmcnt(1) gets merged with the existing vmcnt(0).
+
+---
+name: merge_with_next_wait
+body: |
+ bb.0:
+ ; GCN-LABEL: name: merge_with_next_wait
+ ; GCN: S_WAITCNT 0
+ ; GCN-NEXT: $m0 = S_MOV_B32 0
+ ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3)
+ ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ ; GCN-NEXT: S_WAITCNT 3952
+ ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0
+ $m0 = S_MOV_B32 0
+ BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4)
+ $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec
+ S_WAITCNT_lds_direct
+ S_WAITCNT 3952
+ $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec
+ S_ENDPGM 0
+
+...