aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll159
1 files changed, 76 insertions, 83 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index 980141a..f7ac29e 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -145,6 +145,7 @@ define amdgpu_kernel void @local_volatile_load_0(
;
; GFX1250-LABEL: local_volatile_load_0:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
@@ -208,12 +209,11 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX10-WGP-LABEL: local_volatile_load_1:
; GFX10-WGP: ; %bb.0: ; %entry
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, v0
-; GFX10-WGP-NEXT: s_load_dword s7, s[8:9], 0x0
+; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x0
; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
; GFX10-WGP-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-WGP-NEXT: s_mov_b32 s6, 2
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, s6, s7
+; GFX10-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s6
; GFX10-WGP-NEXT: ds_read_b32 v1, v1
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: global_store_dword v0, v1, s[4:5]
@@ -222,12 +222,11 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX10-CU-LABEL: local_volatile_load_1:
; GFX10-CU: ; %bb.0: ; %entry
; GFX10-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX10-CU-NEXT: s_load_dword s7, s[8:9], 0x0
+; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x0
; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x8
; GFX10-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX10-CU-NEXT: s_mov_b32 s6, 2
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, s6, s7
+; GFX10-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s6
; GFX10-CU-NEXT: ds_read_b32 v1, v1
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: global_store_dword v0, v1, s[4:5]
@@ -259,14 +258,13 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX11-WGP-LABEL: local_volatile_load_1:
; GFX11-WGP: ; %bb.0: ; %entry
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, v0
-; GFX11-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX11-WGP-NEXT: v_mov_b32_e32 v0, 0
-; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff
-; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX11-WGP-NEXT: s_mov_b32 s2, 2
+; GFX11-WGP-NEXT: s_mov_b32 s3, 0x3ff
+; GFX11-WGP-NEXT: v_and_b32_e64 v1, v1, s3
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX11-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2
; GFX11-WGP-NEXT: ds_load_b32 v1, v1
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -275,14 +273,13 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX11-CU-LABEL: local_volatile_load_1:
; GFX11-CU: ; %bb.0: ; %entry
; GFX11-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX11-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX11-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX11-CU-NEXT: s_mov_b32 s2, 2
+; GFX11-CU-NEXT: s_mov_b32 s3, 0x3ff
+; GFX11-CU-NEXT: v_and_b32_e64 v1, v1, s3
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX11-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2
; GFX11-CU-NEXT: ds_load_b32 v1, v1
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -291,15 +288,13 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX12-WGP-LABEL: local_volatile_load_1:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, v0
-; GFX12-WGP-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
-; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff
-; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX12-WGP-NEXT: s_mov_b32 s2, 2
+; GFX12-WGP-NEXT: s_mov_b32 s3, 0x3ff
+; GFX12-WGP-NEXT: v_and_b32_e64 v1, v1, s3
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
-; GFX12-WGP-NEXT: s_wait_alu 0xfffe
-; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX12-WGP-NEXT: v_lshl_add_u32 v1, v1, 2, s2
; GFX12-WGP-NEXT: ds_load_b32 v1, v1
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -308,15 +303,13 @@ define amdgpu_kernel void @local_volatile_load_1(
; GFX12-CU-LABEL: local_volatile_load_1:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: v_mov_b32_e32 v1, v0
-; GFX12-CU-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX12-CU-NEXT: v_mov_b32_e32 v0, 0
-; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff
-; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX12-CU-NEXT: s_mov_b32 s2, 2
+; GFX12-CU-NEXT: s_mov_b32 s3, 0x3ff
+; GFX12-CU-NEXT: v_and_b32_e64 v1, v1, s3
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
-; GFX12-CU-NEXT: s_wait_alu 0xfffe
-; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX12-CU-NEXT: v_lshl_add_u32 v1, v1, 2, s2
; GFX12-CU-NEXT: ds_load_b32 v1, v1
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -324,15 +317,15 @@ define amdgpu_kernel void @local_volatile_load_1(
;
; GFX1250-LABEL: local_volatile_load_1:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: v_mov_b32_e32 v1, v0
-; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x0
; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x8
; GFX1250-NEXT: v_mov_b32_e32 v0, 0
-; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
-; GFX1250-NEXT: v_and_b32_e64 v1, v1, s2
-; GFX1250-NEXT: s_mov_b32 s2, 2
+; GFX1250-NEXT: s_mov_b32 s3, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v1, v1, s3
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshl_add_u32 v1, v1, s2, s3
+; GFX1250-NEXT: v_lshl_add_u32 v1, v1, 2, s2
; GFX1250-NEXT: ds_load_b32 v1, v1
; GFX1250-NEXT: s_wait_dscnt 0x0
; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
@@ -461,6 +454,7 @@ define amdgpu_kernel void @local_volatile_store_0(
;
; GFX1250-LABEL: local_volatile_store_0:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
@@ -511,12 +505,11 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX10-WGP-LABEL: local_volatile_store_1:
; GFX10-WGP: ; %bb.0: ; %entry
-; GFX10-WGP-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
-; GFX10-WGP-NEXT: s_load_dword s6, s[8:9], 0x8
+; GFX10-WGP-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
+; GFX10-WGP-NEXT: s_load_dword s5, s[8:9], 0x8
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-WGP-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX10-WGP-NEXT: s_mov_b32 s5, 2
-; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, s5, s6
+; GFX10-WGP-NEXT: s_load_dword s4, s[6:7], 0x0
+; GFX10-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s5
; GFX10-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-WGP-NEXT: v_mov_b32_e32 v1, s4
; GFX10-WGP-NEXT: ds_write_b32 v0, v1
@@ -524,12 +517,11 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX10-CU-LABEL: local_volatile_store_1:
; GFX10-CU: ; %bb.0: ; %entry
-; GFX10-CU-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
-; GFX10-CU-NEXT: s_load_dword s6, s[8:9], 0x8
+; GFX10-CU-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x0
+; GFX10-CU-NEXT: s_load_dword s5, s[8:9], 0x8
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-CU-NEXT: s_load_dword s4, s[4:5], 0x0
-; GFX10-CU-NEXT: s_mov_b32 s5, 2
-; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, s5, s6
+; GFX10-CU-NEXT: s_load_dword s4, s[6:7], 0x0
+; GFX10-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s5
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
; GFX10-CU-NEXT: ds_write_b32 v0, v1
@@ -552,14 +544,13 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX11-WGP-LABEL: local_volatile_store_1:
; GFX11-WGP: ; %bb.0: ; %entry
-; GFX11-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX11-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX11-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-WGP-NEXT: s_mov_b32 s1, 0x3ff
-; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX11-WGP-NEXT: s_mov_b32 s1, 2
-; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX11-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX11-WGP-NEXT: s_mov_b32 s2, 0x3ff
+; GFX11-WGP-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX11-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1
; GFX11-WGP-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX11-WGP-NEXT: ds_store_b32 v0, v1
@@ -567,14 +558,13 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX11-CU-LABEL: local_volatile_store_1:
; GFX11-CU: ; %bb.0: ; %entry
-; GFX11-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX11-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX11-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX11-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX11-CU-NEXT: s_mov_b32 s1, 0x3ff
-; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX11-CU-NEXT: s_mov_b32 s1, 2
-; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX11-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX11-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX11-CU-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX11-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX11-CU-NEXT: ds_store_b32 v0, v1
@@ -582,15 +572,13 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX12-WGP-LABEL: local_volatile_store_1:
; GFX12-WGP: ; %bb.0: ; %entry
-; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX12-WGP-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX12-WGP-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
-; GFX12-WGP-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX12-WGP-NEXT: s_mov_b32 s1, 0x3ff
-; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX12-WGP-NEXT: s_mov_b32 s1, 2
-; GFX12-WGP-NEXT: s_wait_alu 0xfffe
-; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX12-WGP-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX12-WGP-NEXT: s_mov_b32 s2, 0x3ff
+; GFX12-WGP-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, 2, s1
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
@@ -598,15 +586,13 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX12-CU-LABEL: local_volatile_store_1:
; GFX12-CU: ; %bb.0: ; %entry
-; GFX12-CU-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX12-CU-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX12-CU-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX12-CU-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
-; GFX12-CU-NEXT: s_load_b32 s0, s[0:1], 0x0
-; GFX12-CU-NEXT: s_mov_b32 s1, 0x3ff
-; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX12-CU-NEXT: s_mov_b32 s1, 2
-; GFX12-CU-NEXT: s_wait_alu 0xfffe
-; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX12-CU-NEXT: s_load_b32 s0, s[2:3], 0x0
+; GFX12-CU-NEXT: s_mov_b32 s2, 0x3ff
+; GFX12-CU-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, 2, s1
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
@@ -614,15 +600,15 @@ define amdgpu_kernel void @local_volatile_store_1(
;
; GFX1250-LABEL: local_volatile_store_1:
; GFX1250: ; %bb.0: ; %entry
-; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x8
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
+; GFX1250-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
+; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x8
; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX1250-NEXT: s_load_b32 s0, s[2:3], 0x0
; GFX1250-NEXT: s_wait_xcnt 0x0
-; GFX1250-NEXT: s_mov_b32 s1, 0x3ff
-; GFX1250-NEXT: v_and_b32_e64 v0, v0, s1
-; GFX1250-NEXT: s_mov_b32 s1, 2
-; GFX1250-NEXT: v_lshl_add_u32 v0, v0, s1, s2
+; GFX1250-NEXT: s_mov_b32 s2, 0x3ff
+; GFX1250-NEXT: v_and_b32_e64 v0, v0, s2
+; GFX1250-NEXT: v_lshl_add_u32 v0, v0, 2, s1
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_mov_b32_e32 v1, s0
; GFX1250-NEXT: ds_store_b32 v0, v1
@@ -758,6 +744,7 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
;
; GFX1250-LABEL: local_volatile_workgroup_acquire_load:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0
@@ -819,7 +806,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-CU-NEXT: v_mov_b32_e32 v0, s5
; GFX10-CU-NEXT: v_mov_b32_e32 v1, s4
-; GFX10-CU-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX10-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-CU-NEXT: ds_write_b32 v0, v1
; GFX10-CU-NEXT: s_endpgm
;
@@ -854,7 +842,8 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX11-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX11-CU-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX11-CU-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-CU-NEXT: ds_store_b32 v0, v1
; GFX11-CU-NEXT: s_endpgm
;
@@ -879,12 +868,16 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX12-CU-NEXT: s_wait_dscnt 0x0
+; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
+; GFX12-CU-NEXT: s_wait_samplecnt 0x0
+; GFX12-CU-NEXT: s_wait_storecnt 0x0
+; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
;
; GFX1250-LABEL: local_volatile_workgroup_release_store:
; GFX1250: ; %bb.0: ; %entry
+; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX1250-NEXT: s_load_b32 s0, s[4:5], 0x0
; GFX1250-NEXT: s_load_b32 s1, s[4:5], 0x4
; GFX1250-NEXT: s_wait_kmcnt 0x0