aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll420
1 files changed, 203 insertions, 217 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 54b1554..df77e7d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -40,34 +40,33 @@ define amdgpu_kernel void @udiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; GFX6-LABEL: udiv_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX6-NEXT: s_sub_i32 s4, 0, s3
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s5
+; GFX6-NEXT: s_sub_i32 s2, 0, s5
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s0, s0, s3
-; GFX6-NEXT: s_sub_i32 s0, s2, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s3
+; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
+; GFX6-NEXT: v_readfirstlane_b32 s6, v0
+; GFX6-NEXT: s_mul_i32 s6, s6, s5
+; GFX6-NEXT: s_sub_i32 s4, s4, s6
+; GFX6-NEXT: s_sub_i32 s6, s4, s5
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
+; GFX6-NEXT: s_cmp_ge_u32 s4, s5
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
+; GFX6-NEXT: s_cselect_b32 s4, s6, s4
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
+; GFX6-NEXT: s_cmp_ge_u32 s4, s5
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: udiv_i32:
@@ -138,31 +137,30 @@ define amdgpu_kernel void @urem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; GFX6-LABEL: urem_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX6-NEXT: s_sub_i32 s4, 0, s3
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s5
+; GFX6-NEXT: s_sub_i32 s2, 0, s5
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s0, s0, s3
-; GFX6-NEXT: s_sub_i32 s0, s2, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s3
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s3
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0
+; GFX6-NEXT: v_readfirstlane_b32 s6, v0
+; GFX6-NEXT: s_mul_i32 s6, s6, s5
+; GFX6-NEXT: s_sub_i32 s4, s4, s6
+; GFX6-NEXT: s_sub_i32 s6, s4, s5
+; GFX6-NEXT: s_cmp_ge_u32 s4, s5
+; GFX6-NEXT: s_cselect_b32 s4, s6, s4
+; GFX6-NEXT: s_sub_i32 s6, s4, s5
+; GFX6-NEXT: s_cmp_ge_u32 s4, s5
+; GFX6-NEXT: s_cselect_b32 s4, s6, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: urem_i32:
@@ -242,40 +240,39 @@ define amdgpu_kernel void @sdiv_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; GFX6-LABEL: sdiv_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_abs_i32 s8, s3
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8
-; GFX6-NEXT: s_sub_i32 s4, 0, s8
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_xor_b32 s1, s2, s3
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_abs_i32 s6, s5
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT: s_sub_i32 s2, 0, s6
+; GFX6-NEXT: s_abs_i32 s7, s4
+; GFX6-NEXT: s_xor_b32 s4, s4, s5
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT: s_ashr_i32 s1, s1, 31
+; GFX6-NEXT: s_ashr_i32 s4, s4, 31
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_abs_i32 s0, s2
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0
-; GFX6-NEXT: v_readfirstlane_b32 s2, v0
-; GFX6-NEXT: s_mul_i32 s2, s2, s8
-; GFX6-NEXT: s_sub_i32 s0, s0, s2
-; GFX6-NEXT: s_sub_i32 s2, s0, s8
+; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0
+; GFX6-NEXT: v_readfirstlane_b32 s5, v0
+; GFX6-NEXT: s_mul_i32 s5, s5, s6
+; GFX6-NEXT: s_sub_i32 s5, s7, s5
+; GFX6-NEXT: s_sub_i32 s7, s5, s6
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s8
+; GFX6-NEXT: s_cmp_ge_u32 s5, s6
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: s_cselect_b32 s0, s2, s0
+; GFX6-NEXT: s_cselect_b32 s5, s7, s5
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s8
+; GFX6-NEXT: s_cmp_ge_u32 s5, s6
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: v_xor_b32_e32 v0, s1, v0
-; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s1, v0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: sdiv_i32:
@@ -360,36 +357,35 @@ define amdgpu_kernel void @srem_i32(ptr addrspace(1) %out, i32 %x, i32 %y) {
; GFX6-LABEL: srem_i32:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_abs_i32 s3, s3
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX6-NEXT: s_sub_i32 s4, 0, s3
-; GFX6-NEXT: s_abs_i32 s8, s2
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_abs_i32 s5, s5
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s5
+; GFX6-NEXT: s_sub_i32 s2, 0, s5
+; GFX6-NEXT: s_abs_i32 s6, s4
+; GFX6-NEXT: s_ashr_i32 s4, s4, 31
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_ashr_i32 s0, s2, 31
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0
-; GFX6-NEXT: v_readfirstlane_b32 s1, v0
-; GFX6-NEXT: s_mul_i32 s1, s1, s3
-; GFX6-NEXT: s_sub_i32 s1, s8, s1
-; GFX6-NEXT: s_sub_i32 s2, s1, s3
-; GFX6-NEXT: s_cmp_ge_u32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_sub_i32 s2, s1, s3
-; GFX6-NEXT: s_cmp_ge_u32 s1, s3
-; GFX6-NEXT: s_cselect_b32 s1, s2, s1
-; GFX6-NEXT: s_xor_b32 s1, s1, s0
-; GFX6-NEXT: s_sub_i32 s0, s1, s0
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0
+; GFX6-NEXT: v_readfirstlane_b32 s7, v0
+; GFX6-NEXT: s_mul_i32 s7, s7, s5
+; GFX6-NEXT: s_sub_i32 s6, s6, s7
+; GFX6-NEXT: s_sub_i32 s7, s6, s5
+; GFX6-NEXT: s_cmp_ge_u32 s6, s5
+; GFX6-NEXT: s_cselect_b32 s6, s7, s6
+; GFX6-NEXT: s_sub_i32 s7, s6, s5
+; GFX6-NEXT: s_cmp_ge_u32 s6, s5
+; GFX6-NEXT: s_cselect_b32 s5, s7, s6
+; GFX6-NEXT: s_xor_b32 s5, s5, s4
+; GFX6-NEXT: s_sub_i32 s4, s5, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: srem_i32:
@@ -5462,15 +5458,14 @@ define amdgpu_kernel void @udiv_i32_pow2_shl_denom(ptr addrspace(1) %out, i32 %x
; GFX6-LABEL: udiv_i32_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_add_i32 s0, s3, 12
-; GFX6-NEXT: s_lshr_b32 s0, s2, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_add_i32 s5, s5, 12
+; GFX6-NEXT: s_lshr_b32 s4, s4, s5
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: udiv_i32_pow2_shl_denom:
@@ -5503,16 +5498,15 @@ define amdgpu_kernel void @udiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
; GFX6-LABEL: udiv_v2i32_pow2k_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_lshr_b32 s0, s2, 12
-; GFX6-NEXT: s_lshr_b32 s1, s3, 12
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mov_b32_e32 v1, s1
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_lshr_b32 s4, s4, 12
+; GFX6-NEXT: s_lshr_b32 s5, s5, 12
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: udiv_v2i32_pow2k_denom:
@@ -5546,19 +5540,18 @@ define amdgpu_kernel void @udiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out, <
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: v_mov_b32_e32 v0, 0x100101
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: v_mul_hi_u32 v0, s3, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_lshr_b32 s0, s2, 12
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s3, v0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
+; GFX6-NEXT: s_lshr_b32 s4, s4, 12
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v1, v0
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 11, v0
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: udiv_v2i32_mixed_pow2k_denom:
@@ -5855,16 +5848,15 @@ define amdgpu_kernel void @urem_i32_pow2_shl_denom(ptr addrspace(1) %out, i32 %x
; GFX6-LABEL: urem_i32_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_lshl_b32 s0, 0x1000, s3
-; GFX6-NEXT: s_add_i32 s0, s0, -1
-; GFX6-NEXT: s_and_b32 s0, s2, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_lshl_b32 s5, 0x1000, s5
+; GFX6-NEXT: s_add_i32 s5, s5, -1
+; GFX6-NEXT: s_and_b32 s4, s4, s5
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: urem_i32_pow2_shl_denom:
@@ -5898,16 +5890,15 @@ define amdgpu_kernel void @urem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
; GFX6-LABEL: urem_v2i32_pow2k_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_and_b32 s0, s2, 0xfff
-; GFX6-NEXT: s_and_b32 s1, s3, 0xfff
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mov_b32_e32 v1, s1
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_and_b32 s4, s4, 0xfff
+; GFX6-NEXT: s_and_b32 s5, s5, 0xfff
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: urem_v2i32_pow2k_denom:
@@ -6187,41 +6178,40 @@ define amdgpu_kernel void @sdiv_i32_pow2_shl_denom(ptr addrspace(1) %out, i32 %x
; GFX6-LABEL: sdiv_i32_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s3, 0x1000, s3
-; GFX6-NEXT: s_abs_i32 s8, s3
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s8
-; GFX6-NEXT: s_sub_i32 s4, 0, s8
-; GFX6-NEXT: s_abs_i32 s9, s2
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_lshl_b32 s5, 0x1000, s5
+; GFX6-NEXT: s_abs_i32 s6, s5
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT: s_sub_i32 s2, 0, s6
+; GFX6-NEXT: s_abs_i32 s7, s4
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s9, v0
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s0, s0, s8
-; GFX6-NEXT: s_sub_i32 s0, s9, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s8
+; GFX6-NEXT: v_mul_hi_u32 v0, s7, v0
+; GFX6-NEXT: v_readfirstlane_b32 s8, v0
+; GFX6-NEXT: s_mul_i32 s8, s8, s6
+; GFX6-NEXT: s_sub_i32 s7, s7, s8
+; GFX6-NEXT: s_sub_i32 s8, s7, s6
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s8
+; GFX6-NEXT: s_cmp_ge_u32 s7, s6
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
+; GFX6-NEXT: s_cselect_b32 s7, s8, s7
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
; GFX6-NEXT: v_add_i32_e32 v1, vcc, 1, v0
-; GFX6-NEXT: s_cmp_ge_u32 s0, s8
+; GFX6-NEXT: s_cmp_ge_u32 s7, s6
; GFX6-NEXT: s_cselect_b64 vcc, -1, 0
-; GFX6-NEXT: s_xor_b32 s0, s2, s3
+; GFX6-NEXT: s_xor_b32 s4, s4, s5
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
-; GFX6-NEXT: s_ashr_i32 s0, s0, 31
-; GFX6-NEXT: v_xor_b32_e32 v0, s0, v0
-; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s0, v0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: s_ashr_i32 s4, s4, 31
+; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0
+; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: sdiv_i32_pow2_shl_denom:
@@ -6279,22 +6269,21 @@ define amdgpu_kernel void @sdiv_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
; GFX6-LABEL: sdiv_v2i32_pow2k_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_ashr_i32 s0, s2, 31
-; GFX6-NEXT: s_ashr_i32 s1, s3, 31
-; GFX6-NEXT: s_lshr_b32 s0, s0, 20
-; GFX6-NEXT: s_lshr_b32 s1, s1, 20
-; GFX6-NEXT: s_add_i32 s0, s2, s0
-; GFX6-NEXT: s_add_i32 s1, s3, s1
-; GFX6-NEXT: s_ashr_i32 s0, s0, 12
-; GFX6-NEXT: s_ashr_i32 s1, s1, 12
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mov_b32_e32 v1, s1
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_ashr_i32 s6, s4, 31
+; GFX6-NEXT: s_ashr_i32 s7, s5, 31
+; GFX6-NEXT: s_lshr_b32 s6, s6, 20
+; GFX6-NEXT: s_lshr_b32 s7, s7, 20
+; GFX6-NEXT: s_add_i32 s4, s4, s6
+; GFX6-NEXT: s_add_i32 s5, s5, s7
+; GFX6-NEXT: s_ashr_i32 s4, s4, 12
+; GFX6-NEXT: s_ashr_i32 s5, s5, 12
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: sdiv_v2i32_pow2k_denom:
@@ -6334,22 +6323,21 @@ define amdgpu_kernel void @ssdiv_v2i32_mixed_pow2k_denom(ptr addrspace(1) %out,
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX6-NEXT: v_mov_b32_e32 v0, 0x80080081
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: v_mul_hi_i32 v0, s3, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_ashr_i32 s0, s2, 31
-; GFX6-NEXT: s_lshr_b32 s0, s0, 20
-; GFX6-NEXT: s_add_i32 s0, s2, s0
-; GFX6-NEXT: v_add_i32_e32 v0, vcc, s3, v0
-; GFX6-NEXT: s_ashr_i32 s0, s0, 12
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: v_mul_hi_i32 v0, s5, v0
+; GFX6-NEXT: s_ashr_i32 s6, s4, 31
+; GFX6-NEXT: s_lshr_b32 s6, s6, 20
+; GFX6-NEXT: s_add_i32 s4, s4, s6
+; GFX6-NEXT: v_add_i32_e32 v0, vcc, s5, v0
+; GFX6-NEXT: s_ashr_i32 s4, s4, 12
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 31, v0
; GFX6-NEXT: v_ashrrev_i32_e32 v0, 11, v0
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: ssdiv_v2i32_mixed_pow2k_denom:
@@ -6700,37 +6688,36 @@ define amdgpu_kernel void @srem_i32_pow2_shl_denom(ptr addrspace(1) %out, i32 %x
; GFX6-LABEL: srem_i32_pow2_shl_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshl_b32 s3, 0x1000, s3
-; GFX6-NEXT: s_abs_i32 s3, s3
-; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s3
-; GFX6-NEXT: s_sub_i32 s4, 0, s3
-; GFX6-NEXT: s_abs_i32 s8, s2
-; GFX6-NEXT: s_mov_b32 s5, s1
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_lshl_b32 s2, 0x1000, s5
+; GFX6-NEXT: s_abs_i32 s5, s2
+; GFX6-NEXT: v_cvt_f32_u32_e32 v0, s5
+; GFX6-NEXT: s_sub_i32 s2, 0, s5
+; GFX6-NEXT: s_abs_i32 s6, s4
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT: v_mul_lo_u32 v1, s4, v0
-; GFX6-NEXT: s_mov_b32 s4, s0
+; GFX6-NEXT: v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
-; GFX6-NEXT: v_mul_hi_u32 v0, s8, v0
-; GFX6-NEXT: v_readfirstlane_b32 s0, v0
-; GFX6-NEXT: s_mul_i32 s0, s0, s3
-; GFX6-NEXT: s_sub_i32 s0, s8, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s3
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
-; GFX6-NEXT: s_sub_i32 s1, s0, s3
-; GFX6-NEXT: s_cmp_ge_u32 s0, s3
-; GFX6-NEXT: s_cselect_b32 s0, s1, s0
-; GFX6-NEXT: s_ashr_i32 s1, s2, 31
-; GFX6-NEXT: s_xor_b32 s0, s0, s1
-; GFX6-NEXT: s_sub_i32 s0, s0, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0
+; GFX6-NEXT: v_readfirstlane_b32 s7, v0
+; GFX6-NEXT: s_mul_i32 s7, s7, s5
+; GFX6-NEXT: s_sub_i32 s6, s6, s7
+; GFX6-NEXT: s_sub_i32 s7, s6, s5
+; GFX6-NEXT: s_cmp_ge_u32 s6, s5
+; GFX6-NEXT: s_cselect_b32 s6, s7, s6
+; GFX6-NEXT: s_sub_i32 s7, s6, s5
+; GFX6-NEXT: s_cmp_ge_u32 s6, s5
+; GFX6-NEXT: s_cselect_b32 s5, s7, s6
+; GFX6-NEXT: s_ashr_i32 s4, s4, 31
+; GFX6-NEXT: s_xor_b32 s5, s5, s4
+; GFX6-NEXT: s_sub_i32 s4, s5, s4
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: srem_i32_pow2_shl_denom:
@@ -6785,24 +6772,23 @@ define amdgpu_kernel void @srem_v2i32_pow2k_denom(ptr addrspace(1) %out, <2 x i3
; GFX6-LABEL: srem_v2i32_pow2k_denom:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
-; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_mov_b32 s4, s0
-; GFX6-NEXT: s_mov_b32 s5, s1
-; GFX6-NEXT: s_ashr_i32 s0, s2, 31
-; GFX6-NEXT: s_ashr_i32 s1, s3, 31
-; GFX6-NEXT: s_lshr_b32 s0, s0, 20
-; GFX6-NEXT: s_lshr_b32 s1, s1, 20
-; GFX6-NEXT: s_add_i32 s0, s2, s0
-; GFX6-NEXT: s_add_i32 s1, s3, s1
-; GFX6-NEXT: s_and_b32 s0, s0, 0xfffff000
-; GFX6-NEXT: s_and_b32 s1, s1, 0xfffff000
-; GFX6-NEXT: s_sub_i32 s0, s2, s0
-; GFX6-NEXT: s_sub_i32 s1, s3, s1
-; GFX6-NEXT: v_mov_b32_e32 v0, s0
-; GFX6-NEXT: v_mov_b32_e32 v1, s1
-; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT: s_mov_b64 s[4:5], s[2:3]
+; GFX6-NEXT: s_ashr_i32 s6, s4, 31
+; GFX6-NEXT: s_lshr_b32 s6, s6, 20
+; GFX6-NEXT: s_ashr_i32 s7, s5, 31
+; GFX6-NEXT: s_add_i32 s6, s4, s6
+; GFX6-NEXT: s_lshr_b32 s7, s7, 20
+; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
+; GFX6-NEXT: s_sub_i32 s4, s4, s6
+; GFX6-NEXT: s_add_i32 s6, s5, s7
+; GFX6-NEXT: s_and_b32 s6, s6, 0xfffff000
+; GFX6-NEXT: s_sub_i32 s5, s5, s6
+; GFX6-NEXT: s_mov_b32 s3, 0xf000
+; GFX6-NEXT: s_mov_b32 s2, -1
+; GFX6-NEXT: v_mov_b32_e32 v0, s4
+; GFX6-NEXT: v_mov_b32_e32 v1, s5
+; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX6-NEXT: s_endpgm
;
; GFX9-LABEL: srem_v2i32_pow2k_denom: