diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/urem64.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/urem64.ll | 146 |
1 files changed, 65 insertions, 81 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index cdcc914..137dc1f 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -51,10 +51,9 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: s_addc_u32 s13, 0, s14 ; GCN-NEXT: s_add_u32 s14, s0, s1 ; GCN-NEXT: v_mov_b32_e32 v0, s14 -; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN-NEXT: v_mul_hi_u32 v0, s10, v0 +; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_cmp_lg_u32 s0, 0 ; GCN-NEXT: s_addc_u32 s12, s12, s13 ; GCN-NEXT: s_mul_i32 s0, s10, s12 ; GCN-NEXT: v_readfirstlane_b32 s1, v0 @@ -85,7 +84,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: s_add_u32 s11, s14, s0 ; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 ; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_cmp_lg_u32 s0, 0 ; GCN-NEXT: s_addc_u32 s1, s12, s10 ; GCN-NEXT: v_mov_b32_e32 v0, s1 ; GCN-NEXT: v_mul_hi_u32 v1, s6, v0 @@ -115,46 +113,43 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: v_readfirstlane_b32 s10, v0 ; GCN-NEXT: s_add_i32 s5, s10, s5 ; GCN-NEXT: s_mul_i32 s10, s9, s4 -; GCN-NEXT: s_add_i32 s10, s5, s10 -; GCN-NEXT: s_sub_i32 s11, s7, s10 +; GCN-NEXT: s_add_i32 s12, s5, s10 +; GCN-NEXT: s_sub_i32 s10, s7, s12 ; GCN-NEXT: s_mul_i32 s4, s8, s4 ; GCN-NEXT: s_sub_u32 s6, s6, s4 ; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-NEXT: s_or_b32 s12, s4, s5 -; GCN-NEXT: s_cmp_lg_u32 s12, 0 -; GCN-NEXT: s_subb_u32 s11, s11, s9 -; GCN-NEXT: s_sub_u32 s13, s6, s8 -; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 +; GCN-NEXT: s_or_b32 s11, s4, s5 +; GCN-NEXT: s_subb_u32 s13, s10, s9 +; GCN-NEXT: s_sub_u32 s14, s6, s8 +; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GCN-NEXT: s_or_b32 s15, s10, s11 +; GCN-NEXT: s_subb_u32 s15, s13, 0 +; GCN-NEXT: s_cmp_ge_u32 s15, s9 +; GCN-NEXT: s_cselect_b32 s16, -1, 0 +; GCN-NEXT: s_cmp_ge_u32 s14, s8 +; GCN-NEXT: s_cselect_b32 s17, -1, 0 +; GCN-NEXT: s_cmp_eq_u32 s15, s9 +; GCN-NEXT: s_cselect_b32 s16, s17, s16 +; GCN-NEXT: s_or_b32 s10, s10, s11 +; GCN-NEXT: s_subb_u32 s13, s13, s9 +; GCN-NEXT: s_sub_u32 s17, s14, s8 +; GCN-NEXT: s_cselect_b64 s[10:11], -1, 0 +; GCN-NEXT: s_or_b32 s10, s10, s11 +; GCN-NEXT: s_subb_u32 s10, s13, 0 +; GCN-NEXT: s_cmp_lg_u32 s16, 0 +; GCN-NEXT: s_cselect_b32 s11, s17, s14 +; GCN-NEXT: s_cselect_b32 s10, s10, s15 ; GCN-NEXT: s_or_b32 s4, s4, s5 -; GCN-NEXT: s_cmp_lg_u32 s4, 0 -; GCN-NEXT: s_subb_u32 s14, s11, 0 -; GCN-NEXT: s_cmp_ge_u32 s14, s9 +; GCN-NEXT: s_subb_u32 s4, s7, s12 +; GCN-NEXT: s_cmp_ge_u32 s4, s9 ; GCN-NEXT: s_cselect_b32 s5, -1, 0 -; GCN-NEXT: s_cmp_ge_u32 s13, s8 -; GCN-NEXT: s_cselect_b32 s15, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s14, s9 -; GCN-NEXT: s_cselect_b32 s15, s15, s5 -; GCN-NEXT: s_cmp_lg_u32 s4, 0 -; GCN-NEXT: s_subb_u32 s11, s11, s9 -; GCN-NEXT: s_sub_u32 s16, s13, s8 -; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 -; GCN-NEXT: s_or_b32 s4, s4, s5 -; GCN-NEXT: s_cmp_lg_u32 s4, 0 -; GCN-NEXT: s_subb_u32 s4, s11, 0 -; GCN-NEXT: s_cmp_lg_u32 s15, 0 -; GCN-NEXT: s_cselect_b32 s5, s16, s13 -; GCN-NEXT: s_cselect_b32 s4, s4, s14 -; GCN-NEXT: s_cmp_lg_u32 s12, 0 -; GCN-NEXT: s_subb_u32 s7, s7, s10 -; GCN-NEXT: s_cmp_ge_u32 s7, s9 -; GCN-NEXT: s_cselect_b32 s10, -1, 0 ; GCN-NEXT: s_cmp_ge_u32 s6, s8 -; GCN-NEXT: s_cselect_b32 s8, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s7, s9 -; GCN-NEXT: s_cselect_b32 s8, s8, s10 -; GCN-NEXT: s_cmp_lg_u32 s8, 0 -; GCN-NEXT: s_cselect_b32 s4, s4, s7 -; GCN-NEXT: s_cselect_b32 s5, s5, s6 +; GCN-NEXT: s_cselect_b32 s7, -1, 0 +; GCN-NEXT: s_cmp_eq_u32 s4, s9 +; GCN-NEXT: s_cselect_b32 s5, s7, s5 +; GCN-NEXT: s_cmp_lg_u32 s5, 0 +; GCN-NEXT: s_cselect_b32 s4, s10, s4 +; GCN-NEXT: s_cselect_b32 s5, s11, s6 ; GCN-NEXT: v_mov_b32_e32 v0, s5 ; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -187,7 +182,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_add_u32 s14, s12, 1 ; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_or_b32 s8, s8, s9 -; GCN-IR-NEXT: s_cmp_lg_u32 s8, 0 ; GCN-IR-NEXT: s_addc_u32 s8, s13, 0 ; GCN-IR-NEXT: s_cselect_b64 s[8:9], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s12, 63, s12 @@ -221,7 +215,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-IR-NEXT: s_add_u32 s16, s16, 1 ; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 ; GCN-IR-NEXT: s_or_b32 s18, s18, s19 -; GCN-IR-NEXT: s_cmp_lg_u32 s18, 0 ; GCN-IR-NEXT: s_addc_u32 s17, s17, 0 ; GCN-IR-NEXT: s_cselect_b64 s[18:19], -1, 0 ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] @@ -853,10 +846,9 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_addc_u32 s10, 0, s11 ; GCN-NEXT: s_add_u32 s11, s4, s5 ; GCN-NEXT: v_mov_b32_e32 v0, s11 -; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GCN-NEXT: v_mul_hi_u32 v0, s6, v0 +; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GCN-NEXT: s_or_b32 s4, s4, s5 -; GCN-NEXT: s_cmp_lg_u32 s4, 0 ; GCN-NEXT: s_addc_u32 s9, s9, s10 ; GCN-NEXT: s_mul_i32 s4, s6, s9 ; GCN-NEXT: v_readfirstlane_b32 s5, v0 @@ -887,7 +879,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_add_u32 s8, s11, s4 ; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0 ; GCN-NEXT: s_or_b32 s4, s4, s5 -; GCN-NEXT: s_cmp_lg_u32 s4, 0 ; GCN-NEXT: s_addc_u32 s4, s9, s6 ; GCN-NEXT: v_mul_hi_u32 v1, s8, 24 ; GCN-NEXT: v_mul_hi_u32 v0, s4, 24 @@ -903,46 +894,43 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: s_mul_i32 s0, s3, s8 ; GCN-NEXT: v_readfirstlane_b32 s1, v0 -; GCN-NEXT: s_add_i32 s9, s1, s0 -; GCN-NEXT: s_sub_i32 s10, 0, s9 +; GCN-NEXT: s_add_i32 s10, s1, s0 +; GCN-NEXT: s_sub_i32 s9, 0, s10 ; GCN-NEXT: s_mul_i32 s0, s2, s8 -; GCN-NEXT: s_sub_u32 s8, 24, s0 -; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GCN-NEXT: s_or_b32 s11, s0, s1 -; GCN-NEXT: s_cmp_lg_u32 s11, 0 -; GCN-NEXT: s_subb_u32 s10, s10, s3 -; GCN-NEXT: s_sub_u32 s12, s8, s2 +; GCN-NEXT: s_sub_u32 s11, 24, s0 ; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 +; GCN-NEXT: s_or_b32 s8, s0, s1 +; GCN-NEXT: s_subb_u32 s12, s9, s3 +; GCN-NEXT: s_sub_u32 s13, s11, s2 +; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-NEXT: s_or_b32 s14, s8, s9 +; GCN-NEXT: s_subb_u32 s14, s12, 0 +; GCN-NEXT: s_cmp_ge_u32 s14, s3 +; GCN-NEXT: s_cselect_b32 s15, -1, 0 +; GCN-NEXT: s_cmp_ge_u32 s13, s2 +; GCN-NEXT: s_cselect_b32 s16, -1, 0 +; GCN-NEXT: s_cmp_eq_u32 s14, s3 +; GCN-NEXT: s_cselect_b32 s15, s16, s15 +; GCN-NEXT: s_or_b32 s8, s8, s9 +; GCN-NEXT: s_subb_u32 s12, s12, s3 +; GCN-NEXT: s_sub_u32 s16, s13, s2 +; GCN-NEXT: s_cselect_b64 s[8:9], -1, 0 +; GCN-NEXT: s_or_b32 s8, s8, s9 +; GCN-NEXT: s_subb_u32 s8, s12, 0 +; GCN-NEXT: s_cmp_lg_u32 s15, 0 +; GCN-NEXT: s_cselect_b32 s9, s16, s13 +; GCN-NEXT: s_cselect_b32 s8, s8, s14 ; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_subb_u32 s13, s10, 0 -; GCN-NEXT: s_cmp_ge_u32 s13, s3 +; GCN-NEXT: s_subb_u32 s0, 0, s10 +; GCN-NEXT: s_cmp_ge_u32 s0, s3 ; GCN-NEXT: s_cselect_b32 s1, -1, 0 -; GCN-NEXT: s_cmp_ge_u32 s12, s2 -; GCN-NEXT: s_cselect_b32 s14, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s13, s3 -; GCN-NEXT: s_cselect_b32 s14, s14, s1 -; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_subb_u32 s10, s10, s3 -; GCN-NEXT: s_sub_u32 s15, s12, s2 -; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 -; GCN-NEXT: s_or_b32 s0, s0, s1 -; GCN-NEXT: s_cmp_lg_u32 s0, 0 -; GCN-NEXT: s_subb_u32 s0, s10, 0 -; GCN-NEXT: s_cmp_lg_u32 s14, 0 -; GCN-NEXT: s_cselect_b32 s1, s15, s12 -; GCN-NEXT: s_cselect_b32 s0, s0, s13 -; GCN-NEXT: s_cmp_lg_u32 s11, 0 -; GCN-NEXT: s_subb_u32 s9, 0, s9 -; GCN-NEXT: s_cmp_ge_u32 s9, s3 -; GCN-NEXT: s_cselect_b32 s10, -1, 0 -; GCN-NEXT: s_cmp_ge_u32 s8, s2 +; GCN-NEXT: s_cmp_ge_u32 s11, s2 ; GCN-NEXT: s_cselect_b32 s2, -1, 0 -; GCN-NEXT: s_cmp_eq_u32 s9, s3 -; GCN-NEXT: s_cselect_b32 s2, s2, s10 -; GCN-NEXT: s_cmp_lg_u32 s2, 0 -; GCN-NEXT: s_cselect_b32 s0, s0, s9 -; GCN-NEXT: s_cselect_b32 s1, s1, s8 +; GCN-NEXT: s_cmp_eq_u32 s0, s3 +; GCN-NEXT: s_cselect_b32 s1, s2, s1 +; GCN-NEXT: s_cmp_lg_u32 s1, 0 +; GCN-NEXT: s_cselect_b32 s0, s8, s0 +; GCN-NEXT: s_cselect_b32 s1, s9, s11 ; GCN-NEXT: v_mov_b32_e32 v0, s1 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -970,7 +958,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_add_u32 s10, s8, 1 ; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_or_b32 s6, s6, s7 -; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 ; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 ; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 @@ -1003,7 +990,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_add_u32 s14, s14, 1 ; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 ; GCN-IR-NEXT: s_or_b32 s16, s16, s17 -; GCN-IR-NEXT: s_cmp_lg_u32 s16, 0 ; GCN-IR-NEXT: s_addc_u32 s15, s15, 0 ; GCN-IR-NEXT: s_cselect_b64 s[16:17], -1, 0 ; GCN-IR-NEXT: s_mov_b64 s[8:9], s[4:5] @@ -1093,7 +1079,6 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_add_u32 s11, s8, 1 ; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_or_b32 s6, s6, s7 -; GCN-IR-NEXT: s_cmp_lg_u32 s6, 0 ; GCN-IR-NEXT: s_addc_u32 s6, s9, 0 ; GCN-IR-NEXT: s_cselect_b64 s[6:7], -1, 0 ; GCN-IR-NEXT: s_sub_i32 s8, 63, s8 @@ -1123,7 +1108,6 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x) ; GCN-IR-NEXT: s_add_u32 s12, s12, 1 ; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0 ; GCN-IR-NEXT: s_or_b32 s14, s14, s15 -; GCN-IR-NEXT: s_cmp_lg_u32 s14, 0 ; GCN-IR-NEXT: s_addc_u32 s13, s13, 0 ; GCN-IR-NEXT: s_cselect_b64 s[14:15], -1, 0 ; GCN-IR-NEXT: s_mov_b64 s[10:11], s[4:5] |