# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefix=GFX12 %s # Test that we can optimize away s_wait_loadcnt at function boundaries when # the only pending LOAD_CNT events are from GLOBAL_INV (which doesn't write # to VGPRs). # # When a function contains only GLOBAL_INV with no actual VMEM loads pending # to VGPRs, we should not need to emit s_wait_loadcnt 0 before the return. --- # Test 1: Only GLOBAL_INV, no VGPR loads - should NOT need S_WAIT_LOADCNT # before return because GLOBAL_INV doesn't write to VGPRs. name: func_global_inv_only tracksRegLiveness: true machineFunctionInfo: isEntryFunction: false body: | bb.0: liveins: $sgpr30_sgpr31 ; GFX12-LABEL: name: func_global_inv_only ; GFX12: liveins: $sgpr30_sgpr31 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GFX12-NEXT: S_WAIT_EXPCNT 0 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 ; GFX12-NEXT: S_WAIT_BVHCNT 0 ; GFX12-NEXT: S_WAIT_KMCNT 0 ; GFX12-NEXT: GLOBAL_INV 16, implicit $exec ; GFX12-NOT: S_WAIT_LOADCNT ; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31 GLOBAL_INV 16, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31 ... --- # Test 2: GLOBAL_INV with actual VGPR load - MUST wait for loadcnt name: func_global_inv_with_vgpr_load tracksRegLiveness: true machineFunctionInfo: isEntryFunction: false body: | bb.0: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-LABEL: name: func_global_inv_with_vgpr_load ; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GFX12-NEXT: S_WAIT_EXPCNT 0 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 ; GFX12-NEXT: S_WAIT_BVHCNT 0 ; GFX12-NEXT: S_WAIT_KMCNT 0 ; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX12-NEXT: GLOBAL_INV 16, implicit $exec ; GFX12-NEXT: S_WAIT_LOADCNT 0 ; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) GLOBAL_INV 16, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- # Test 3: Only VGPR load (no GLOBAL_INV) - MUST wait for loadcnt name: func_vgpr_load_no_global_inv tracksRegLiveness: true machineFunctionInfo: isEntryFunction: false body: | bb.0: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-LABEL: name: func_vgpr_load_no_global_inv ; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GFX12-NEXT: S_WAIT_EXPCNT 0 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 ; GFX12-NEXT: S_WAIT_BVHCNT 0 ; GFX12-NEXT: S_WAIT_KMCNT 0 ; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX12-NEXT: S_WAIT_LOADCNT 0 ; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ... --- # Test 4: GLOBAL_INV with load already waited on - should NOT need S_WAIT_LOADCNT at return # The load was waited on when $vgpr0 was used, so only GLOBAL_INV is pending at return. name: func_global_inv_load_already_waited tracksRegLiveness: true machineFunctionInfo: isEntryFunction: false body: | bb.0: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-LABEL: name: func_global_inv_load_already_waited ; GFX12: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr30_sgpr31 ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: S_WAIT_LOADCNT_DSCNT 0 ; GFX12-NEXT: S_WAIT_EXPCNT 0 ; GFX12-NEXT: S_WAIT_SAMPLECNT 0 ; GFX12-NEXT: S_WAIT_BVHCNT 0 ; GFX12-NEXT: S_WAIT_KMCNT 0 ; GFX12-NEXT: renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) ; GFX12-NEXT: S_WAIT_LOADCNT 0 ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec ; GFX12-NEXT: GLOBAL_INV 16, implicit $exec ; GFX12-NOT: S_WAIT_LOADCNT ; GFX12-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 renamable $vgpr0 = GLOBAL_LOAD_DWORD_SADDR renamable $sgpr0_sgpr1, killed $vgpr0, 0, 0, implicit $exec :: (load (s32), addrspace 1) $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec GLOBAL_INV 16, implicit $exec S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ...