diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir new file mode 100644 index 0000000..675a1c9 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-fence-soft.mir @@ -0,0 +1,133 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GCN %s + + +# Expected vmcnt(0) since the direct load is the only load. +--- +name: dma_then_fence +body: | + bb.0: + ; GCN-LABEL: name: dma_then_fence + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: $m0 = S_MOV_B32 0 + ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) + ; GCN-NEXT: S_WAITCNT 3952 + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + S_WAITCNT_lds_direct + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + +# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts. + +--- +name: dma_then_global_load +body: | + bb.0: + ; GCN-LABEL: name: dma_then_global_load + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: $m0 = S_MOV_B32 0 + ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + ; GCN-NEXT: S_WAITCNT 3953 + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + S_WAITCNT_lds_direct + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + +# Expected no vmcnt since there is no direct load to LDS, and the global load is not processed by SIInsertWaitcnts. + +--- +name: no_dma_just_fence +body: | + bb.0: + ; GCN-LABEL: name: no_dma_just_fence + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + S_WAITCNT_lds_direct + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + +# Expected vmcnt(1) since the global load is not processed by SIInsertWaitcnts. + +--- +name: dma_then_system_fence +body: | + bb.0: + ; GCN-LABEL: name: dma_then_system_fence + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + ; GCN-NEXT: S_WAITCNT 3953 + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + S_WAITCNT_lds_direct + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + +# The computed vmcnt(1) gets merged with the existing vmcnt(0). + +--- +name: merge_with_prev_wait +body: | + bb.0: + ; GCN-LABEL: name: merge_with_prev_wait + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: $m0 = S_MOV_B32 0 + ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + ; GCN-NEXT: S_WAITCNT 3952 + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + S_WAITCNT 3952 + S_WAITCNT_lds_direct + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... + +# The computed vmcnt(1) gets merged with the existing vmcnt(0). + +--- +name: merge_with_next_wait +body: | + bb.0: + ; GCN-LABEL: name: merge_with_next_wait + ; GCN: S_WAITCNT 0 + ; GCN-NEXT: $m0 = S_MOV_B32 0 + ; GCN-NEXT: BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4, addrspace 1), (store (s32) into `ptr addrspace(3) poison` + 4, addrspace 3) + ; GCN-NEXT: $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + ; GCN-NEXT: S_WAITCNT 3952 + ; GCN-NEXT: $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + ; GCN-NEXT: S_ENDPGM 0 + $m0 = S_MOV_B32 0 + BUFFER_LOAD_DWORD_LDS_IDXEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, implicit $exec, implicit $m0 :: (load (s32) from `ptr addrspace(1) poison` + 4), (store (s32) into `ptr addrspace(3) poison` + 4) + $vgpr2 = GLOBAL_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec + S_WAITCNT_lds_direct + S_WAITCNT 3952 + $vgpr1 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $mode, implicit $exec + S_ENDPGM 0 + +... |