aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/srem64.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/srem64.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/srem64.ll88
1 files changed, 44 insertions, 44 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 47dfa9f..33c2ce6 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -921,45 +921,47 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: s_ashr_i64 s[10:11], s[2:3], 31
-; GCN-NEXT: s_ashr_i64 s[6:7], s[4:5], 31
-; GCN-NEXT: s_ashr_i32 s4, s5, 31
-; GCN-NEXT: s_add_u32 s6, s6, s4
-; GCN-NEXT: s_mov_b32 s5, s4
-; GCN-NEXT: s_addc_u32 s7, s7, s4
-; GCN-NEXT: s_xor_b64 s[8:9], s[6:7], s[4:5]
+; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 31
+; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], 31
+; GCN-NEXT: s_ashr_i32 s6, s5, 31
+; GCN-NEXT: s_add_u32 s4, s4, s6
+; GCN-NEXT: s_mov_b32 s7, s6
+; GCN-NEXT: s_addc_u32 s5, s5, s6
+; GCN-NEXT: s_xor_b64 s[8:9], s[4:5], s[6:7]
; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
-; GCN-NEXT: s_sub_u32 s2, 0, s8
-; GCN-NEXT: s_subb_u32 s4, 0, s9
-; GCN-NEXT: s_ashr_i32 s12, s3, 31
+; GCN-NEXT: s_sub_u32 s4, 0, s8
+; GCN-NEXT: s_subb_u32 s5, 0, s9
+; GCN-NEXT: s_ashr_i32 s10, s3, 31
; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
-; GCN-NEXT: s_mov_b32 s13, s12
-; GCN-NEXT: s_mov_b32 s5, s1
-; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_add_u32 s2, s2, s10
+; GCN-NEXT: s_mov_b32 s11, s10
+; GCN-NEXT: s_addc_u32 s3, s3, s10
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
+; GCN-NEXT: s_xor_b64 s[12:13], s[2:3], s[10:11]
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
+; GCN-NEXT: v_mul_hi_u32 v3, s4, v0
+; GCN-NEXT: v_mul_lo_u32 v5, s5, v0
+; GCN-NEXT: v_mul_lo_u32 v4, s4, v0
; GCN-NEXT: s_mov_b32 s6, -1
-; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
-; GCN-NEXT: v_mul_hi_u32 v3, s2, v0
-; GCN-NEXT: v_mul_lo_u32 v5, s4, v0
-; GCN-NEXT: v_mul_lo_u32 v4, s2, v0
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v5
; GCN-NEXT: v_mul_hi_u32 v3, v0, v4
; GCN-NEXT: v_mul_lo_u32 v5, v0, v2
-; GCN-NEXT: v_mul_hi_u32 v6, v0, v2
-; GCN-NEXT: v_mul_hi_u32 v7, v1, v2
-; GCN-NEXT: v_mul_lo_u32 v2, v1, v2
-; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
-; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc
+; GCN-NEXT: v_mul_hi_u32 v7, v0, v2
; GCN-NEXT: v_mul_lo_u32 v6, v1, v4
; GCN-NEXT: v_mul_hi_u32 v4, v1, v4
+; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5
+; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v7, vcc
+; GCN-NEXT: v_mul_hi_u32 v7, v1, v2
+; GCN-NEXT: v_mul_lo_u32 v2, v1, v2
; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v6
; GCN-NEXT: v_addc_u32_e32 v3, vcc, v5, v4, vcc
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc
@@ -967,12 +969,12 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
-; GCN-NEXT: v_mul_hi_u32 v3, s2, v0
-; GCN-NEXT: v_mul_lo_u32 v4, s4, v0
-; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
+; GCN-NEXT: v_mul_hi_u32 v3, s4, v0
+; GCN-NEXT: v_mul_lo_u32 v4, s5, v0
+; GCN-NEXT: s_mov_b32 s5, s1
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3
-; GCN-NEXT: v_mul_lo_u32 v3, s2, v0
+; GCN-NEXT: v_mul_lo_u32 v3, s4, v0
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_mul_lo_u32 v6, v0, v2
; GCN-NEXT: v_mul_hi_u32 v7, v0, v3
@@ -988,20 +990,18 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT: s_add_u32 s2, s10, s12
; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2
-; GCN-NEXT: s_addc_u32 s3, s11, s12
; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc
-; GCN-NEXT: s_xor_b64 s[10:11], s[2:3], s[12:13]
-; GCN-NEXT: v_mul_lo_u32 v2, s10, v1
-; GCN-NEXT: v_mul_hi_u32 v3, s10, v0
-; GCN-NEXT: v_mul_hi_u32 v4, s10, v1
-; GCN-NEXT: v_mul_hi_u32 v5, s11, v1
-; GCN-NEXT: v_mul_lo_u32 v1, s11, v1
+; GCN-NEXT: v_mul_lo_u32 v2, s12, v1
+; GCN-NEXT: v_mul_hi_u32 v3, s12, v0
+; GCN-NEXT: v_mul_hi_u32 v4, s12, v1
+; GCN-NEXT: v_mul_hi_u32 v5, s13, v1
+; GCN-NEXT: v_mul_lo_u32 v1, s13, v1
; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2
; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GCN-NEXT: v_mul_lo_u32 v4, s11, v0
-; GCN-NEXT: v_mul_hi_u32 v0, s11, v0
+; GCN-NEXT: v_mul_lo_u32 v4, s13, v0
+; GCN-NEXT: v_mul_hi_u32 v0, s13, v0
+; GCN-NEXT: s_mov_b32 s4, s0
; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc
; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc
@@ -1013,9 +1013,9 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_mul_lo_u32 v0, s8, v0
; GCN-NEXT: v_add_i32_e32 v1, vcc, v1, v2
; GCN-NEXT: v_add_i32_e32 v1, vcc, v3, v1
-; GCN-NEXT: v_sub_i32_e32 v2, vcc, s11, v1
+; GCN-NEXT: v_sub_i32_e32 v2, vcc, s13, v1
; GCN-NEXT: v_mov_b32_e32 v3, s9
-; GCN-NEXT: v_sub_i32_e32 v0, vcc, s10, v0
+; GCN-NEXT: v_sub_i32_e32 v0, vcc, s12, v0
; GCN-NEXT: v_subb_u32_e64 v2, s[0:1], v2, v3, vcc
; GCN-NEXT: v_subrev_i32_e64 v4, s[0:1], s8, v0
; GCN-NEXT: v_subbrev_u32_e64 v5, s[2:3], 0, v2, s[0:1]
@@ -1030,7 +1030,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_subbrev_u32_e64 v2, s[0:1], 0, v2, s[0:1]
; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v6
; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[0:1]
-; GCN-NEXT: v_mov_b32_e32 v4, s11
+; GCN-NEXT: v_mov_b32_e32 v4, s13
; GCN-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc
; GCN-NEXT: v_cmp_le_u32_e32 vcc, s9, v1
; GCN-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
@@ -1042,10 +1042,10 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GCN-NEXT: v_xor_b32_e32 v0, s12, v0
-; GCN-NEXT: v_xor_b32_e32 v1, s12, v1
-; GCN-NEXT: v_mov_b32_e32 v2, s12
-; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s12, v0
+; GCN-NEXT: v_xor_b32_e32 v0, s10, v0
+; GCN-NEXT: v_xor_b32_e32 v1, s10, v1
+; GCN-NEXT: v_mov_b32_e32 v2, s10
+; GCN-NEXT: v_subrev_i32_e32 v0, vcc, s10, v0
; GCN-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GCN-NEXT: s_endpgm