diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/carryout-selection.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/carryout-selection.ll | 65 |
1 files changed, 18 insertions, 47 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index 4a63452..b71885b 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -822,10 +822,9 @@ define amdgpu_kernel void @suaddo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[4:5], s[6:7] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5] ; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX1250-NEXT: s_wait_alu 0xf1ff ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -1803,10 +1802,9 @@ define amdgpu_kernel void @susubo64(ptr addrspace(1) %out, ptr addrspace(1) %car ; GFX1250-NEXT: v_mov_b32_e32 v2, 0 ; GFX1250-NEXT: s_wait_kmcnt 0x0 ; GFX1250-NEXT: s_sub_nc_u64 s[6:7], s[4:5], s[6:7] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5] ; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[6:7] -; GFX1250-NEXT: s_wait_alu 0xf1ff ; GFX1250-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 ; GFX1250-NEXT: s_clause 0x1 ; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] @@ -3136,26 +3134,22 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX1250-NEXT: s_fmac_f32 s0, s1, 0x4f800000 ; GFX1250-NEXT: v_s_rcp_f32 s0, s0 -; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX1250-NEXT: s_mul_f32 s0, s0, 0x5f7ffffc -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_f32 s1, s0, 0x2f800000 -; GFX1250-NEXT: s_wait_alu 0xfffe -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_2) +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX1250-NEXT: s_trunc_f32 s1, s1 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_fmac_f32 s0, s1, 0xcf800000 ; GFX1250-NEXT: s_cvt_u32_f32 s5, s1 ; GFX1250-NEXT: s_mov_b32 s1, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3) ; GFX1250-NEXT: s_cvt_u32_f32 s4, s0 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_mul_u64 s[12:13], s[6:7], s[4:5] +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: s_mul_hi_u32 s15, s4, s13 ; GFX1250-NEXT: s_mul_i32 s14, s4, s13 ; GFX1250-NEXT: s_mul_hi_u32 s0, s4, s12 ; GFX1250-NEXT: s_mul_i32 s17, s5, s12 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[0:1], s[14:15] ; GFX1250-NEXT: s_mul_hi_u32 s16, s5, s12 ; GFX1250-NEXT: s_mul_hi_u32 s18, s5, s13 @@ -3163,99 +3157,82 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1250-NEXT: s_add_co_ci_u32 s0, s15, s16 ; GFX1250-NEXT: s_mul_i32 s12, s5, s13 ; GFX1250-NEXT: s_add_co_ci_u32 s13, s18, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[0:1], s[12:13] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: v_add_co_u32 v0, s0, s4, s12 ; GFX1250-NEXT: s_cmp_lg_u32 s0, 0 ; GFX1250-NEXT: s_add_co_ci_u32 s5, s5, s13 ; GFX1250-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_u64 s[6:7], s[6:7], s[4:5] ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: s_mul_hi_u32 s13, s4, s7 ; GFX1250-NEXT: s_mul_i32 s12, s4, s7 ; GFX1250-NEXT: s_mul_hi_u32 s0, s4, s6 ; GFX1250-NEXT: s_mul_i32 s15, s5, s6 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[0:1], s[12:13] ; GFX1250-NEXT: s_mul_hi_u32 s14, s5, s6 ; GFX1250-NEXT: s_mul_hi_u32 s4, s5, s7 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_co_u32 s0, s12, s15 ; GFX1250-NEXT: s_add_co_ci_u32 s0, s13, s14 ; GFX1250-NEXT: s_mul_i32 s6, s5, s7 ; GFX1250-NEXT: s_add_co_ci_u32 s7, s4, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_add_nc_u64 s[6:7], s[0:1], s[6:7] -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: v_add_co_u32 v0, s0, v0, s6 ; GFX1250-NEXT: s_cmp_lg_u32 s0, 0 ; GFX1250-NEXT: s_add_co_ci_u32 s0, s5, s7 ; GFX1250-NEXT: v_readfirstlane_b32 s7, v0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_hi_u32 s5, s10, s0 ; GFX1250-NEXT: s_mul_i32 s4, s10, s0 ; GFX1250-NEXT: s_mul_hi_u32 s12, s11, s0 ; GFX1250-NEXT: s_mul_i32 s6, s11, s0 ; GFX1250-NEXT: s_mul_hi_u32 s0, s10, s7 ; GFX1250-NEXT: s_mul_i32 s13, s11, s7 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[0:1], s[4:5] ; GFX1250-NEXT: s_mul_hi_u32 s0, s11, s7 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_co_u32 s4, s4, s13 ; GFX1250-NEXT: s_add_co_ci_u32 s0, s5, s0 ; GFX1250-NEXT: s_add_co_ci_u32 s7, s12, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_add_nc_u64 s[4:5], s[0:1], s[6:7] -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_and_b64 s[6:7], s[4:5], lit64(0xffffffff00000000) -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_or_b32 s6, s6, s4 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_u64 s[4:5], s[2:3], s[6:7] ; GFX1250-NEXT: s_add_nc_u64 s[14:15], s[6:7], 2 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: v_sub_co_u32 v0, s0, s10, s4 ; GFX1250-NEXT: s_sub_co_i32 s4, s11, s5 ; GFX1250-NEXT: s_cmp_lg_u32 s0, 0 ; GFX1250-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 ; GFX1250-NEXT: v_sub_co_u32 v1, s12, v0, s2 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_sub_co_ci_u32 s4, s4, s3 ; GFX1250-NEXT: s_cmp_lg_u32 s12, 0 ; GFX1250-NEXT: s_add_nc_u64 s[12:13], s[6:7], 1 ; GFX1250-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v1 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_sub_co_ci_u32 s4, s4, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: s_cmp_ge_u32 s4, s3 ; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc_lo ; GFX1250-NEXT: s_cselect_b32 s14, -1, 0 ; GFX1250-NEXT: s_cmp_eq_u32 s4, s3 ; GFX1250-NEXT: s_cselect_b32 vcc_lo, -1, 0 ; GFX1250-NEXT: s_cmp_lg_u32 s0, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: v_cndmask_b32_e32 v1, s14, v1, vcc_lo ; GFX1250-NEXT: v_cmp_le_u32_e32 vcc_lo, s2, v0 ; GFX1250-NEXT: s_sub_co_ci_u32 s0, s11, s5 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: s_cmp_ge_u32 s0, s3 -; GFX1250-NEXT: s_wait_alu 0xfffd ; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc_lo ; GFX1250-NEXT: s_cselect_b32 s4, -1, 0 ; GFX1250-NEXT: s_cmp_eq_u32 s0, s3 ; GFX1250-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1 ; GFX1250-NEXT: s_cselect_b32 s0, -1, 0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_cndmask_b32_e64 v0, s4, v0, s0 -; GFX1250-NEXT: s_wait_alu 0xfffd ; GFX1250-NEXT: v_cndmask_b32_e32 v2, s12, v2, vcc_lo ; GFX1250-NEXT: v_cndmask_b32_e32 v1, s13, v3, vcc_lo -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1250-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 -; GFX1250-NEXT: s_wait_alu 0xfffd +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) ; GFX1250-NEXT: v_cndmask_b32_e32 v1, s7, v1, vcc_lo ; GFX1250-NEXT: v_cndmask_b32_e32 v0, s6, v2, vcc_lo ; GFX1250-NEXT: s_cbranch_execnz .LBB16_3 @@ -3269,31 +3246,25 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1250-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX1250-NEXT: v_readfirstlane_b32 s0, v0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_i32 s1, s1, s0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_mul_hi_u32 s1, s0, s1 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_add_co_i32 s0, s0, s1 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1250-NEXT: s_mul_hi_u32 s0, s10, s0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_mul_i32 s1, s0, s2 ; GFX1250-NEXT: s_add_co_i32 s3, s0, 1 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_sub_co_i32 s1, s10, s1 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: s_sub_co_i32 s4, s1, s2 ; GFX1250-NEXT: s_cmp_ge_u32 s1, s2 ; GFX1250-NEXT: s_cselect_b32 s0, s3, s0 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_cselect_b32 s1, s4, s1 ; GFX1250-NEXT: s_add_co_i32 s3, s0, 1 -; GFX1250-NEXT: s_wait_alu 0xfffe ; GFX1250-NEXT: s_cmp_ge_u32 s1, s2 ; GFX1250-NEXT: s_mov_b32 s1, 0 ; GFX1250-NEXT: s_cselect_b32 s0, s3, s0 -; GFX1250-NEXT: s_wait_alu 0xfffe +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX1250-NEXT: .LBB16_3: ; GFX1250-NEXT: v_mov_b32_e32 v2, 0 |