diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll | 159 |
1 files changed, 76 insertions, 83 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll index 980141a..f7ac29e 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll @@ -145,6 +145,7 @@ define amdgpu_kernel void @local_volatile_load_0( ; ; GFX1250-LABEL: local_volatile_load_0: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 @@ -208,12 +209,11 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX10-WGP-LABEL: local_volatile_load_1: ; GFX10-WGP: ; %bb.0: ; %entry ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-WGP-NEXT: s_mov_b32 s6, 2 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-WGP-NEXT: ds_read_b32 v1, v1 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5] @@ -222,12 +222,11 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX10-CU-LABEL: local_volatile_load_1: ; GFX10-CU: ; %bb.0: ; %entry ; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0 ; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8 ; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX10-CU-NEXT: s_mov_b32 s6, 2 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7 +; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6 ; GFX10-CU-NEXT: ds_read_b32 v1, v1 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5] @@ -259,14 +258,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX11-WGP-LABEL: local_volatile_load_1: ; GFX11-WGP: ; %bb.0: ; %entry ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-WGP-NEXT: s_mov_b32 s2, 2 +; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-WGP-NEXT: ds_load_b32 v1, v1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -275,14 +273,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX11-CU-LABEL: local_volatile_load_1: ; GFX11-CU: ; %bb.0: ; %entry ; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX11-CU-NEXT: s_mov_b32 s2, 2 +; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX11-CU-NEXT: ds_load_b32 v1, v1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -291,15 +288,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX12-WGP-LABEL: local_volatile_load_1: ; GFX12-WGP: ; %bb.0: ; %entry ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-WGP-NEXT: s_mov_b32 s2, 2 +; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-WGP-NEXT: ds_load_b32 v1, v1 ; GFX12-WGP-NEXT: s_wait_dscnt 0x0 ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -308,15 +303,13 @@ define amdgpu_kernel void @local_volatile_load_1( ; GFX12-CU-LABEL: local_volatile_load_1: ; GFX12-CU: ; %bb.0: ; %entry ; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0 -; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0 -; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX12-CU-NEXT: s_mov_b32 s2, 2 +; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX12-CU-NEXT: ds_load_b32 v1, v1 ; GFX12-CU-NEXT: s_wait_dscnt 0x0 ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -324,15 +317,15 @@ define amdgpu_kernel void @local_volatile_load_1( ; ; GFX1250-LABEL: local_volatile_load_1: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-NEXT: v_mov_b32_e32 v1, v0 -; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8 ; GFX1250-NEXT: v_mov_b32_e32 v0, 0 -; GFX1250-NEXT: s_mov_b32 s2, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2 -; GFX1250-NEXT: s_mov_b32 s2, 2 +; GFX1250-NEXT: s_mov_b32 s3, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3 +; GFX1250-NEXT: v_lshl_add_u32 v1, v1, 2, s2 ; GFX1250-NEXT: ds_load_b32 v1, v1 ; GFX1250-NEXT: s_wait_dscnt 0x0 ; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] @@ -461,6 +454,7 @@ define amdgpu_kernel void @local_volatile_store_0( ; ; GFX1250-LABEL: local_volatile_store_0: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 ; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 @@ -511,12 +505,11 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX10-WGP-LABEL: local_volatile_store_1: ; GFX10-WGP: ; %bb.0: ; %entry -; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-WGP-NEXT: s_mov_b32 s5, 2 -; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-WGP-NEXT: ds_write_b32 v0, v1 @@ -524,12 +517,11 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX10-CU-LABEL: local_volatile_store_1: ; GFX10-CU: ; %bb.0: ; %entry -; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0 -; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8 +; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0 +; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0 -; GFX10-CU-NEXT: s_mov_b32 s5, 2 -; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, s5, s6 +; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0 +; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s5 ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 @@ -552,14 +544,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX11-WGP-LABEL: local_volatile_store_1: ; GFX11-WGP: ; %bb.0: ; %entry -; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-WGP-NEXT: s_mov_b32 s1, 2 -; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-WGP-NEXT: ds_store_b32 v0, v1 @@ -567,14 +558,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX11-CU-LABEL: local_volatile_store_1: ; GFX11-CU: ; %bb.0: ; %entry -; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX11-CU-NEXT: s_mov_b32 s1, 2 -; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 @@ -582,15 +572,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX12-WGP-LABEL: local_volatile_store_1: ; GFX12-WGP: ; %bb.0: ; %entry -; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 -; GFX12-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-WGP-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-WGP-NEXT: s_mov_b32 s1, 2 -; GFX12-WGP-NEXT: s_wait_alu 0xfffe -; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-WGP-NEXT: ds_store_b32 v0, v1 @@ -598,15 +586,13 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX12-CU-LABEL: local_volatile_store_1: ; GFX12-CU: ; %bb.0: ; %entry -; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 -; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0 -; GFX12-CU-NEXT: s_mov_b32 s1, 0x3ff -; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX12-CU-NEXT: s_mov_b32 s1, 2 -; GFX12-CU-NEXT: s_wait_alu 0xfffe -; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX12-CU-NEXT: s_load_b32 s0, s[2:3], 0x0 +; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff +; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 @@ -614,15 +600,15 @@ define amdgpu_kernel void @local_volatile_store_1( ; ; GFX1250-LABEL: local_volatile_store_1: ; GFX1250: ; %bb.0: ; %entry -; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 -; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8 +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 +; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0 +; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8 ; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0 +; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0 ; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_mov_b32 s1, 0x3ff -; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1 -; GFX1250-NEXT: s_mov_b32 s1, 2 -; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2 +; GFX1250-NEXT: s_mov_b32 s2, 0x3ff +; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2 +; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 2, s1 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: v_mov_b32_e32 v1, s0 ; GFX1250-NEXT: ds_store_b32 v0, v1 @@ -758,6 +744,7 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load( ; ; GFX1250-LABEL: local_volatile_workgroup_acquire_load: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4 ; GFX1250-NEXT: s_wait_kmcnt 0x0 @@ -819,7 +806,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5 ; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4 -; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX10-CU-NEXT: ds_write_b32 v0, v1 ; GFX10-CU-NEXT: s_endpgm ; @@ -854,7 +842,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-CU-NEXT: ds_store_b32 v0, v1 ; GFX11-CU-NEXT: s_endpgm ; @@ -879,12 +868,16 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store( ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0 -; GFX12-CU-NEXT: s_wait_dscnt 0x0 +; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 +; GFX12-CU-NEXT: s_wait_samplecnt 0x0 +; GFX12-CU-NEXT: s_wait_storecnt 0x0 +; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: ds_store_b32 v0, v1 ; GFX12-CU-NEXT: s_endpgm ; ; GFX1250-LABEL: local_volatile_workgroup_release_store: ; GFX1250: ; %bb.0: ; %entry +; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0 ; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4 ; GFX1250-NEXT: s_wait_kmcnt 0x0 |
