diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll | 304 |
1 files changed, 136 insertions, 168 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll index bd4ee03..2ff66c9 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll @@ -281,7 +281,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -322,7 +322,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7] ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v6, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 @@ -370,7 +370,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[4:5] ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -414,7 +414,7 @@ define amdgpu_ps <2 x float> @flat_xchg_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[6:7] ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v6, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[4:5], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 @@ -458,7 +458,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1] ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo -; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_xchg_saddr_i64_nortn: @@ -491,7 +491,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: .LBB12_4: ; %atomicrmw.private ; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -531,7 +531,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-SDAG-NEXT: .LBB13_4: ; %atomicrmw.private ; GFX1250-SDAG-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[0:1] ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc_lo -; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v0, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_xchg_saddr_i64_nortn_neg128: @@ -567,7 +567,7 @@ define amdgpu_ps void @flat_xchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-GISEL-NEXT: .LBB13_4: ; %atomicrmw.private ; GFX1250-GISEL-NEXT: v_cmp_ne_u64_e32 vcc_lo, 0, v[2:3] ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v2, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v0, v[4:5], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -673,7 +673,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB18_5 @@ -715,7 +715,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB18_5 @@ -764,7 +764,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB19_5 @@ -809,7 +809,7 @@ define amdgpu_ps <2 x float> @flat_add_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[2:3], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB19_5 @@ -855,7 +855,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_add_saddr_i64_nortn: @@ -891,7 +891,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -934,7 +934,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_add_saddr_i64_nortn_neg128: @@ -973,7 +973,7 @@ define amdgpu_ps void @flat_add_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -1079,7 +1079,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB26_5 @@ -1121,7 +1121,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB26_5 @@ -1170,7 +1170,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB27_5 @@ -1215,7 +1215,7 @@ define amdgpu_ps <2 x float> @flat_sub_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[2:3], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB27_5 @@ -1261,7 +1261,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_sub_saddr_i64_nortn: @@ -1297,7 +1297,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -1340,7 +1340,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_sub_saddr_i64_nortn_neg128: @@ -1379,7 +1379,7 @@ define amdgpu_ps void @flat_sub_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_sub_nc_u64_e32 v[0:1], v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -1486,7 +1486,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB34_5 @@ -1529,7 +1529,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB34_5 @@ -1579,7 +1579,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB35_5 @@ -1625,7 +1625,7 @@ define amdgpu_ps <2 x float> @flat_and_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB35_5 @@ -1672,7 +1672,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_and_saddr_i64_nortn: @@ -1709,7 +1709,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -1753,7 +1753,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_and_saddr_i64_nortn_neg128: @@ -1793,7 +1793,7 @@ define amdgpu_ps void @flat_and_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_and_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -1900,7 +1900,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB42_5 @@ -1943,7 +1943,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn(ptr inreg %sbase, i32 %voffs ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB42_5 @@ -1993,7 +1993,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB43_5 @@ -2039,7 +2039,7 @@ define amdgpu_ps <2 x float> @flat_or_saddr_i64_rtn_neg128(ptr inreg %sbase, i32 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB43_5 @@ -2086,7 +2086,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_or_saddr_i64_nortn: @@ -2123,7 +2123,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, i ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -2167,7 +2167,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_or_saddr_i64_nortn_neg128: @@ -2207,7 +2207,7 @@ define amdgpu_ps void @flat_or_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vof ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_or_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -2314,7 +2314,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB50_5 @@ -2357,7 +2357,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB50_5 @@ -2407,7 +2407,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v3, v1, v3 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v2, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB51_5 @@ -2453,7 +2453,7 @@ define amdgpu_ps <2 x float> @flat_xor_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v2, v0, v4 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v3, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB51_5 @@ -2500,7 +2500,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_xor_saddr_i64_nortn: @@ -2537,7 +2537,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -2581,7 +2581,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v1, v1, v3 ; GFX1250-SDAG-NEXT: v_xor_b32_e32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_xor_saddr_i64_nortn_neg128: @@ -2621,7 +2621,7 @@ define amdgpu_ps void @flat_xor_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v0, v0, v4 ; GFX1250-GISEL-NEXT: v_xor_b32_e32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -2721,9 +2721,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_max_i64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB58_5 @@ -2765,9 +2764,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_max_i64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB58_5 @@ -2816,9 +2814,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_max_i64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB59_5 @@ -2863,9 +2860,8 @@ define amdgpu_ps <2 x float> @flat_max_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_max_i64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB59_5 @@ -2909,9 +2905,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_max_saddr_i64_nortn: @@ -2945,9 +2940,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_max_i64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -2988,9 +2982,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_max_saddr_i64_nortn_neg128: @@ -3027,9 +3020,8 @@ define amdgpu_ps void @flat_max_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_max_i64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -3129,9 +3121,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_min_i64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB66_5 @@ -3173,9 +3164,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_min_i64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB66_5 @@ -3224,9 +3214,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_min_i64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB67_5 @@ -3271,9 +3260,8 @@ define amdgpu_ps <2 x float> @flat_min_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_min_i64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB67_5 @@ -3317,9 +3305,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_min_saddr_i64_nortn: @@ -3353,9 +3340,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_min_i64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -3396,9 +3382,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_i64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_min_saddr_i64_nortn_neg128: @@ -3435,9 +3420,8 @@ define amdgpu_ps void @flat_min_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_min_i64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -3537,9 +3521,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_max_u64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB74_5 @@ -3581,9 +3564,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_max_u64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB74_5 @@ -3632,9 +3614,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_max_u64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB75_5 @@ -3679,9 +3660,8 @@ define amdgpu_ps <2 x float> @flat_umax_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_max_u64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB75_5 @@ -3725,9 +3705,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_umax_saddr_i64_nortn: @@ -3761,9 +3740,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_max_u64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -3804,9 +3782,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_umax_saddr_i64_nortn_neg128: @@ -3843,9 +3820,8 @@ define amdgpu_ps void @flat_umax_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_max_u64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -3945,9 +3921,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_min_u64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB82_5 @@ -3989,9 +3964,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn(ptr inreg %sbase, i32 %vof ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_min_u64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB82_5 @@ -4040,9 +4014,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v3, v1 :: v_dual_cndmask_b32 v2, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-SDAG-NEXT: v_min_u64 v[2:3], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB83_5 @@ -4087,9 +4060,8 @@ define amdgpu_ps <2 x float> @flat_umin_saddr_i64_rtn_neg128(ptr inreg %sbase, i ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v6, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v4, v0 :: v_dual_cndmask_b32 v3, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: v_min_u64 v[2:3], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB83_5 @@ -4133,9 +4105,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_umin_saddr_i64_nortn: @@ -4169,9 +4140,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_min_u64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -4212,9 +4182,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-SDAG-NEXT: v_cndmask_b32_e32 v4, -1, v0, vcc_lo ; GFX1250-SDAG-NEXT: scratch_load_b64 v[0:1], v4, off ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1250-SDAG-NEXT: v_cmp_le_u64_e32 vcc_lo, v[0:1], v[2:3] -; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v0, v2, v0 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3] +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_umin_saddr_i64_nortn_neg128: @@ -4251,9 +4220,8 @@ define amdgpu_ps void @flat_umin_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %v ; GFX1250-GISEL-NEXT: v_cndmask_b32_e32 v2, -1, v2, vcc_lo ; GFX1250-GISEL-NEXT: scratch_load_b64 v[0:1], v2, off ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1250-GISEL-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[4:5] -; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: v_min_u64 v[0:1], v[0:1], v[4:5] +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -4377,7 +4345,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 % ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v1, v5 :: v_dual_cndmask_b32 v2, v0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB90_5 @@ -4423,7 +4391,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn(ptr inreg %sbase, i32 % ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9] ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v0, v6 :: v_dual_cndmask_b32 v3, v1, v7 -; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB90_5 @@ -4477,7 +4445,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v1, v5 :: v_dual_cndmask_b32 v2, v0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v8, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB91_5 @@ -4526,7 +4494,7 @@ define amdgpu_ps <2 x float> @flat_cmpxchg_saddr_i64_rtn_neg128(ptr inreg %sbase ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9] ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v0, v6 :: v_dual_cndmask_b32 v3, v1, v7 -; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v4, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB91_5 @@ -4577,7 +4545,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_cmpxchg_saddr_i64_nortn: @@ -4617,7 +4585,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn(ptr inreg %sbase, i32 %voffs ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9] ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -4664,7 +4632,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 ; GFX1250-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1250-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[6:7] ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_cndmask_b32 v0, v0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_cmpxchg_saddr_i64_nortn_neg128: @@ -4707,7 +4675,7 @@ define amdgpu_ps void @flat_cmpxchg_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 ; GFX1250-GISEL-NEXT: s_wait_loadcnt 0x0 ; GFX1250-GISEL-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[8:9] ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v6 :: v_dual_cndmask_b32 v1, v1, v7 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -4808,7 +4776,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB98_5 @@ -4854,7 +4822,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc_lo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB98_5 @@ -4906,7 +4874,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, 0, v5 :: v_dual_cndmask_b32 v2, 0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-SDAG-NEXT: s_branch .LBB99_5 @@ -4955,7 +4923,7 @@ define amdgpu_ps <2 x float> @flat_inc_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc_lo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 ; GFX1250-GISEL-NEXT: s_branch .LBB99_5 @@ -5002,7 +4970,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, 0, v5 :: v_dual_cndmask_b32 v0, 0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_inc_saddr_i64_nortn: @@ -5040,7 +5008,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -5084,7 +5052,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[0:1], v[2:3] ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, 0, v5 :: v_dual_cndmask_b32 v0, 0, v4 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_inc_saddr_i64_nortn_neg128: @@ -5125,7 +5093,7 @@ define amdgpu_ps void @flat_inc_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo ; GFX1250-GISEL-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -5229,7 +5197,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v5, v3 :: v_dual_cndmask_b32 v2, v4, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1250-SDAG-NEXT: s_branch .LBB106_5 @@ -5276,7 +5244,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn(ptr inreg %sbase, i32 %voff ; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v2, v4 :: v_dual_cndmask_b32 v3, v3, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1250-GISEL-NEXT: s_branch .LBB106_5 @@ -5330,7 +5298,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v3, v5, v3 :: v_dual_cndmask_b32 v2, v4, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0 ; GFX1250-SDAG-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1250-SDAG-NEXT: s_branch .LBB107_5 @@ -5380,7 +5348,7 @@ define amdgpu_ps <2 x float> @flat_dec_saddr_i64_rtn_neg128(ptr inreg %sbase, i3 ; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v2, v2, v4 :: v_dual_cndmask_b32 v3, v3, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v6, v[2:3], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_wait_xcnt 0x0 ; GFX1250-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 ; GFX1250-GISEL-NEXT: s_branch .LBB107_5 @@ -5429,7 +5397,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_dec_saddr_i64_nortn: @@ -5468,7 +5436,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn(ptr inreg %sbase, i32 %voffset, ; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset @@ -5514,7 +5482,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-SDAG-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-SDAG-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_cndmask_b32 v0, v0, v2 -; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off +; GFX1250-SDAG-NEXT: scratch_store_b64 v4, v[0:1], off scope:SCOPE_SE ; GFX1250-SDAG-NEXT: s_endpgm ; ; GFX1250-GISEL-LABEL: flat_dec_saddr_i64_nortn_neg128: @@ -5556,7 +5524,7 @@ define amdgpu_ps void @flat_dec_saddr_i64_nortn_neg128(ptr inreg %sbase, i32 %vo ; GFX1250-GISEL-NEXT: s_or_b32 vcc_lo, vcc_lo, s0 ; GFX1250-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1250-GISEL-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 -; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off +; GFX1250-GISEL-NEXT: scratch_store_b64 v2, v[0:1], off scope:SCOPE_SE ; GFX1250-GISEL-NEXT: s_endpgm %zext.offset = zext i32 %voffset to i64 %gep0 = getelementptr inbounds i8, ptr %sbase, i64 %zext.offset |