aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll304
1 files changed, 136 insertions, 168 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
index bd4ee03..2ff66c9 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll
@@ -281,7 +281,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
@@ -322,7 +322,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v6, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
@@ -370,7 +370,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5]
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
@@ -414,7 +414,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7]
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v6, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
@@ -458,7 +458,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
-; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_xchg_saddr_i64_nortn:
@@ -491,7 +491,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: .LBB12_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3]
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -531,7 +531,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: .LBB13_4: ; %atomicrmw.private
; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1]
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo
-; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_xchg_saddr_i64_nortn_neg128:
@@ -567,7 +567,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: .LBB13_4: ; %atomicrmw.private
; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3]
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -673,7 +673,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB18_5
@@ -715,7 +715,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB18_5
@@ -764,7 +764,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB19_5
@@ -809,7 +809,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB19_5
@@ -855,7 +855,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_add_saddr_i64_nortn:
@@ -891,7 +891,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -934,7 +934,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_add_saddr_i64_nortn_neg128:
@@ -973,7 +973,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -1079,7 +1079,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB26_5
@@ -1121,7 +1121,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB26_5
@@ -1170,7 +1170,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB27_5
@@ -1215,7 +1215,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB27_5
@@ -1261,7 +1261,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_sub_saddr_i64_nortn:
@@ -1297,7 +1297,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -1340,7 +1340,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_sub_saddr_i64_nortn_neg128:
@@ -1379,7 +1379,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -1486,7 +1486,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_and_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB34_5
@@ -1529,7 +1529,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_and_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB34_5
@@ -1579,7 +1579,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_and_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB35_5
@@ -1625,7 +1625,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_and_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB35_5
@@ -1672,7 +1672,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_and_saddr_i64_nortn:
@@ -1709,7 +1709,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -1753,7 +1753,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_and_saddr_i64_nortn_neg128:
@@ -1793,7 +1793,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -1900,7 +1900,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB42_5
@@ -1943,7 +1943,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_or_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_or_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB42_5
@@ -1993,7 +1993,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB43_5
@@ -2039,7 +2039,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_or_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_or_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB43_5
@@ -2086,7 +2086,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_or_saddr_i64_nortn:
@@ -2123,7 +2123,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_or_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -2167,7 +2167,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_or_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_or_saddr_i64_nortn_neg128:
@@ -2207,7 +2207,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_or_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -2314,7 +2314,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB50_5
@@ -2357,7 +2357,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB50_5
@@ -2407,7 +2407,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v3, v1, v3
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v2, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB51_5
@@ -2453,7 +2453,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v2, v0, v4
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v3, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB51_5
@@ -2500,7 +2500,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_xor_saddr_i64_nortn:
@@ -2537,7 +2537,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -2581,7 +2581,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3
; GFX1250-SDAG-NEXT: v_xor_b32_e32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_xor_saddr_i64_nortn_neg128:
@@ -2621,7 +2621,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -2721,9 +2721,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_max_i64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB58_5
@@ -2765,9 +2764,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_max_i64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB58_5
@@ -2816,9 +2814,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_max_i64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB59_5
@@ -2863,9 +2860,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_max_i64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB59_5
@@ -2909,9 +2905,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_max_saddr_i64_nortn:
@@ -2945,9 +2940,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_max_i64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -2988,9 +2982,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_max_saddr_i64_nortn_neg128:
@@ -3027,9 +3020,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_max_i64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -3129,9 +3121,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_min_i64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB66_5
@@ -3173,9 +3164,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_min_i64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB66_5
@@ -3224,9 +3214,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_min_i64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB67_5
@@ -3271,9 +3260,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_min_i64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB67_5
@@ -3317,9 +3305,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_min_saddr_i64_nortn:
@@ -3353,9 +3340,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_min_i64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -3396,9 +3382,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_min_saddr_i64_nortn_neg128:
@@ -3435,9 +3420,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_min_i64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -3537,9 +3521,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_max_u64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB74_5
@@ -3581,9 +3564,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_max_u64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB74_5
@@ -3632,9 +3614,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_max_u64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB75_5
@@ -3679,9 +3660,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_max_u64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB75_5
@@ -3725,9 +3705,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_umax_saddr_i64_nortn:
@@ -3761,9 +3740,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_max_u64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -3804,9 +3782,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_umax_saddr_i64_nortn_neg128:
@@ -3843,9 +3820,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_max_u64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -3945,9 +3921,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_min_u64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB82_5
@@ -3989,9 +3964,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_min_u64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB82_5
@@ -4040,9 +4014,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-SDAG-NEXT: v_min_u64 v[2:3], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB83_5
@@ -4087,9 +4060,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: v_min_u64 v[2:3], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB83_5
@@ -4133,9 +4105,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_umin_saddr_i64_nortn:
@@ -4169,9 +4140,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_min_u64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -4212,9 +4182,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo
; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
-; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3]
-; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3]
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_umin_saddr_i64_nortn_neg128:
@@ -4251,9 +4220,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v
; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo
; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
-; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5]
-; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: v_min_u64 v[0:1], v[0:1], v[4:5]
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -4377,7 +4345,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v1, v5 :: v_dual_cndmask_b32 v2, v0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB90_5
@@ -4423,7 +4391,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 %
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9]
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v0, v6 :: v_dual_cndmask_b32 v3, v1, v7
-; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB90_5
@@ -4477,7 +4445,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v1, v5 :: v_dual_cndmask_b32 v2, v0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB91_5
@@ -4526,7 +4494,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9]
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v0, v6 :: v_dual_cndmask_b32 v3, v1, v7
-; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB91_5
@@ -4577,7 +4545,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_cmpxchg_saddr_i64_nortn:
@@ -4617,7 +4585,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9]
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -4664,7 +4632,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32
; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7]
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_cmpxchg_saddr_i64_nortn_neg128:
@@ -4707,7 +4675,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32
; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9]
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -4808,7 +4776,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB98_5
@@ -4854,7 +4822,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc_lo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB98_5
@@ -4906,7 +4874,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-SDAG-NEXT: s_branch .LBB99_5
@@ -4955,7 +4923,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc_lo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX1250-GISEL-NEXT: s_branch .LBB99_5
@@ -5002,7 +4970,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, 0, v5 :: v_dual_cndmask_b32 v0, 0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_inc_saddr_i64_nortn:
@@ -5040,7 +5008,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -5084,7 +5052,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3]
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, 0, v5 :: v_dual_cndmask_b32 v0, 0, v4
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_inc_saddr_i64_nortn_neg128:
@@ -5125,7 +5093,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo
; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -5229,7 +5197,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v5, v3 :: v_dual_cndmask_b32 v2, v4, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1250-SDAG-NEXT: s_branch .LBB106_5
@@ -5276,7 +5244,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn(ptr inreg %sbase, i32 %voff
; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v2, v4 :: v_dual_cndmask_b32 v3, v3, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1250-GISEL-NEXT: s_branch .LBB106_5
@@ -5330,7 +5298,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v5, v3 :: v_dual_cndmask_b32 v2, v4, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1250-SDAG-NEXT: s_branch .LBB107_5
@@ -5380,7 +5348,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn_neg128(ptr inreg %sbase, i3
; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v2, v4 :: v_dual_cndmask_b32 v3, v3, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0
; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1
; GFX1250-GISEL-NEXT: s_branch .LBB107_5
@@ -5429,7 +5397,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_dec_saddr_i64_nortn:
@@ -5468,7 +5436,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset,
; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset
@@ -5514,7 +5482,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2
-; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off
+; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE
; GFX1250-SDAG-NEXT: s_endpgm
;
; GFX1250-GISEL-LABEL: flat_dec_saddr_i64_nortn_neg128:
@@ -5556,7 +5524,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo
; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0
; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5
-; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off
+; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE
; GFX1250-GISEL-NEXT: s_endpgm
%zext.offset = zext i32 %voffset to i64
%gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset