diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/frem.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/frem.ll | 1300 |
1 files changed, 650 insertions, 650 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 78a961e..415828f 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -4858,7 +4858,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: v_cvt_f16_f32_e32 v4, v2 ; SI-NEXT: s_cbranch_vccz .LBB9_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else20 ; SI-NEXT: v_bfi_b32 v7, s0, 0, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v4, v4 ; SI-NEXT: v_cmp_eq_f32_e32 vcc, v5, v6 @@ -4869,7 +4869,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB9_2: ; SI-NEXT: ; implicit-def: $vgpr4 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB9_3: ; %frem.compute +; SI-NEXT: .LBB9_3: ; %frem.compute19 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v5 @@ -4905,10 +4905,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB9_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB9_5: ; %frem.loop_body +; SI-NEXT: .LBB9_5: ; %frem.loop_body27 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v7, v5 ; SI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -4923,7 +4923,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB9_5 ; SI-NEXT: ; %bb.6: ; %Flow55 ; SI-NEXT: v_mov_b32_e32 v5, v7 -; SI-NEXT: .LBB9_7: ; %frem.loop_exit +; SI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v5, v5, s1 ; SI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -4944,7 +4944,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v7, |v7| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v7 ; SI-NEXT: s_cbranch_vccz .LBB9_10 -; SI-NEXT: ; %bb.9: ; %frem.else20 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v8, s0, 0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v5, v5 @@ -4956,7 +4956,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB9_10: ; SI-NEXT: ; implicit-def: $vgpr5 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB9_11: ; %frem.compute19 +; SI-NEXT: .LBB9_11: ; %frem.compute ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v6 @@ -4992,10 +4992,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB9_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB9_13: ; %frem.loop_body27 +; SI-NEXT: .LBB9_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v8, v6 ; SI-NEXT: v_mul_f32_e32 v6, v8, v7 @@ -5010,7 +5010,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB9_13 ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; SI-NEXT: .LBB9_15: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v6, v6, s1 ; SI-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -5084,7 +5084,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_and_b32_e32 v5, 0x7fffffff, v3 ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB9_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else20 ; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 ; CI-NEXT: v_bfi_b32 v7, s0, 0, v2 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -5093,7 +5093,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_8 ; CI-NEXT: .LBB9_2: ; CI-NEXT: ; implicit-def: $vgpr4 -; CI-NEXT: .LBB9_3: ; %frem.compute +; CI-NEXT: .LBB9_3: ; %frem.compute19 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; CI-NEXT: v_frexp_mant_f32_e32 v4, v6 ; CI-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -5118,10 +5118,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10 ; CI-NEXT: v_add_i32_e32 v6, vcc, 11, v6 -; CI-NEXT: .LBB9_5: ; %frem.loop_body +; CI-NEXT: .LBB9_5: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -5136,7 +5136,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB9_5 ; CI-NEXT: ; %bb.6: ; %Flow55 ; CI-NEXT: v_mov_b32_e32 v7, v9 -; CI-NEXT: .LBB9_7: ; %frem.loop_exit +; CI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v6, vcc, -10, v6 ; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6 ; CI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -5157,7 +5157,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v6, |v6| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6 ; CI-NEXT: s_cbranch_vccz .LBB9_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: v_cvt_f32_f16_e32 v5, v5 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v8, s0, 0, v0 @@ -5167,7 +5167,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB9_16 ; CI-NEXT: .LBB9_10: ; CI-NEXT: ; implicit-def: $vgpr5 -; CI-NEXT: .LBB9_11: ; %frem.compute19 +; CI-NEXT: .LBB9_11: ; %frem.compute ; CI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7 ; CI-NEXT: v_frexp_mant_f32_e32 v5, v7 ; CI-NEXT: v_frexp_mant_f32_e32 v7, v6 @@ -5192,10 +5192,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7 ; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB9_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11 ; CI-NEXT: v_add_i32_e32 v7, vcc, 11, v7 -; CI-NEXT: .LBB9_13: ; %frem.loop_body27 +; CI-NEXT: .LBB9_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v10, v8 ; CI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -5210,7 +5210,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB9_13 ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v8, v10 -; CI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB9_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v7, vcc, -10, v7 ; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7 ; CI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -5275,7 +5275,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v3, |v1| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; VI-NEXT: s_cbranch_vccz .LBB9_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else20 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v2, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 @@ -5284,7 +5284,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_8 ; VI-NEXT: .LBB9_2: ; VI-NEXT: ; implicit-def: $vgpr2 -; VI-NEXT: .LBB9_3: ; %frem.compute +; VI-NEXT: .LBB9_3: ; %frem.compute19 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v7, v4 ; VI-NEXT: v_frexp_mant_f32_e32 v2, v4 ; VI-NEXT: v_frexp_mant_f32_e32 v4, v3 @@ -5309,10 +5309,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4 ; VI-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; VI-NEXT: v_sub_u32_e32 v4, vcc, v7, v8 ; VI-NEXT: v_add_u32_e32 v4, vcc, 11, v4 -; VI-NEXT: .LBB9_5: ; %frem.loop_body +; VI-NEXT: .LBB9_5: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v7, v5 ; VI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -5327,7 +5327,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB9_5 ; VI-NEXT: ; %bb.6: ; %Flow55 ; VI-NEXT: v_mov_b32_e32 v5, v7 -; VI-NEXT: .LBB9_7: ; %frem.loop_exit +; VI-NEXT: .LBB9_7: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v4, vcc, -10, v4 ; VI-NEXT: v_ldexp_f32 v4, v5, v4 ; VI-NEXT: v_mul_f32_e32 v5, v4, v6 @@ -5347,7 +5347,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v6, |v4| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6 ; VI-NEXT: s_cbranch_vccz .LBB9_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v5, s2, 0, v3 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v7, v6 @@ -5356,7 +5356,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB9_16 ; VI-NEXT: .LBB9_10: ; VI-NEXT: ; implicit-def: $vgpr5 -; VI-NEXT: .LBB9_11: ; %frem.compute19 +; VI-NEXT: .LBB9_11: ; %frem.compute ; VI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7 ; VI-NEXT: v_frexp_mant_f32_e32 v5, v7 ; VI-NEXT: v_frexp_mant_f32_e32 v7, v6 @@ -5381,10 +5381,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7 ; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB9_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11 ; VI-NEXT: v_add_u32_e32 v7, vcc, 11, v7 -; VI-NEXT: .LBB9_13: ; %frem.loop_body27 +; VI-NEXT: .LBB9_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v10, v8 ; VI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -5399,7 +5399,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB9_13 ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v8, v10 -; VI-NEXT: .LBB9_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB9_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v7, vcc, -10, v7 ; VI-NEXT: v_ldexp_f32 v7, v8, v7 ; VI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -5443,7 +5443,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v3, |v0| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3 ; GFX9-NEXT: s_cbranch_vccz .LBB9_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else20 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v2, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3 @@ -5452,7 +5452,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB9_8 ; GFX9-NEXT: .LBB9_2: ; GFX9-NEXT: ; implicit-def: $vgpr2 -; GFX9-NEXT: .LBB9_3: ; %frem.compute +; GFX9-NEXT: .LBB9_3: ; %frem.compute19 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v7, v4 ; GFX9-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v3 @@ -5477,10 +5477,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4 ; GFX9-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX9-NEXT: v_sub_u32_e32 v4, v7, v8 ; GFX9-NEXT: v_add_u32_e32 v4, 11, v4 -; GFX9-NEXT: .LBB9_5: ; %frem.loop_body +; GFX9-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v7, v5 ; GFX9-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -5495,7 +5495,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB9_5 ; GFX9-NEXT: ; %bb.6: ; %Flow55 ; GFX9-NEXT: v_mov_b32_e32 v5, v7 -; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX9-NEXT: v_add_u32_e32 v4, -10, v4 ; GFX9-NEXT: v_ldexp_f32 v4, v5, v4 ; GFX9-NEXT: v_mul_f32_e32 v5, v4, v6 @@ -5514,7 +5514,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v5, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; GFX9-NEXT: s_cbranch_vccz .LBB9_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else20 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -5523,7 +5523,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB9_16 ; GFX9-NEXT: .LBB9_10: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB9_11: ; %frem.compute19 +; GFX9-NEXT: .LBB9_11: ; %frem.compute ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -5548,10 +5548,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 11, v6 -; GFX9-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX9-NEXT: .LBB9_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -5566,7 +5566,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB9_13 ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v6, -10, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -5612,7 +5612,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v3, |v0| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX10-NEXT: s_cbranch_vccz .LBB9_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else20 ; GFX10-NEXT: v_bfi_b32 v2, 0x7fff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc_lo @@ -5620,7 +5620,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB9_8 ; GFX10-NEXT: .LBB9_2: ; GFX10-NEXT: ; implicit-def: $vgpr2 -; GFX10-NEXT: .LBB9_3: ; %frem.compute +; GFX10-NEXT: .LBB9_3: ; %frem.compute19 ; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -5647,10 +5647,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX10-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB9_5: ; %frem.loop_body +; GFX10-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v7, v4 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -5666,7 +5666,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow55 ; GFX10-NEXT: v_mov_b32_e32 v6, s2 ; GFX10-NEXT: v_mov_b32_e32 v4, v7 -; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX10-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX10-NEXT: v_ldexp_f32 v4, v4, v6 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v5 @@ -5684,7 +5684,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v6, |v3| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v4 ; GFX10-NEXT: s_cbranch_vccz .LBB9_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else20 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v5, 0x7fff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v4 ; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc_lo @@ -5692,7 +5692,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB9_16 ; GFX10-NEXT: .LBB9_10: ; GFX10-NEXT: ; implicit-def: $vgpr5 -; GFX10-NEXT: .LBB9_11: ; %frem.compute19 +; GFX10-NEXT: .LBB9_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e32 v5, v6 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 ; GFX10-NEXT: v_ldexp_f32 v6, v5, 11 @@ -5719,10 +5719,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX10-NEXT: .LBB9_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -5738,7 +5738,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -5782,7 +5782,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_2 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 @@ -5793,7 +5793,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB9_8 ; GFX11-TRUE16-NEXT: .LBB9_2: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -5829,11 +5829,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -5853,7 +5853,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -5880,7 +5880,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_10 -; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 @@ -5891,7 +5891,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB9_16 ; GFX11-TRUE16-NEXT: .LBB9_10: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v6 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v6 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -5927,11 +5927,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v7 @@ -5951,7 +5951,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, -10, v9 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v7, v7, v9 @@ -6002,7 +6002,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_2 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX11-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, v0 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -6011,7 +6011,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB9_8 ; GFX11-FAKE16-NEXT: .LBB9_2: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, v4 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v3 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4 @@ -6047,11 +6047,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -6071,7 +6071,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -6097,7 +6097,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v7, v5 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_10 -; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX11-FAKE16-NEXT: v_bfi_b32 v6, 0x7fff, 0, v3 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v7, v5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -6106,7 +6106,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB9_16 ; GFX11-FAKE16-NEXT: .LBB9_10: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6 -; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v7 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v7 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) @@ -6142,11 +6142,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v7 @@ -6166,7 +6166,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, -10, v9 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v7, v7, v9 @@ -6220,7 +6220,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s6, s5 @@ -6232,7 +6232,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB9_8 ; GFX1150-TRUE16-NEXT: .LBB9_2: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6267,11 +6267,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s5, s6, s5 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s5, s5, 11 -; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6293,7 +6293,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6323,7 +6323,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s7 @@ -6335,7 +6335,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB9_16 ; GFX1150-TRUE16-NEXT: .LBB9_10: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6370,11 +6370,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s7, s8, s7 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s7, s7, 11 -; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6396,7 +6396,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6459,7 +6459,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s6, s5 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6469,7 +6469,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB9_8 ; GFX1150-FAKE16-NEXT: .LBB9_2: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6504,11 +6504,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s5, s6, s5 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s5, s5, 11 -; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6530,7 +6530,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6559,7 +6559,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s7 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6569,7 +6569,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB9_16 ; GFX1150-FAKE16-NEXT: .LBB9_10: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6604,11 +6604,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s7, s8, s7 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s7, s7, 11 -; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6630,7 +6630,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6690,7 +6690,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s6, s5 @@ -6702,7 +6702,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB9_8 ; GFX1200-TRUE16-NEXT: .LBB9_2: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6737,11 +6737,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s5, s6, s5 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s5, s5, 11 -; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -6765,7 +6765,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -6799,7 +6799,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s7 @@ -6811,7 +6811,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB9_16 ; GFX1200-TRUE16-NEXT: .LBB9_10: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -6847,11 +6847,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s7, s8, s7 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s7, s7, 11 -; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -6875,7 +6875,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -6940,7 +6940,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2 -; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else20 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s6, s5 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -6950,7 +6950,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB9_8 ; GFX1200-FAKE16-NEXT: .LBB9_2: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute +; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute19 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -6986,11 +6986,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7 -; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s5, s6, s5 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s5, s5, 11 -; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body +; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -7014,7 +7014,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow55 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s5 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit +; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -7047,7 +7047,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10 -; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s7 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -7058,7 +7058,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB9_16 ; GFX1200-FAKE16-NEXT: .LBB9_10: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute19 +; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8 @@ -7094,11 +7094,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15 -; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s7, s8, s7 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s7, s7, 11 -; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27 +; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -7122,7 +7122,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28 +; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -7208,7 +7208,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: v_cvt_f16_f32_e32 v8, v6 ; SI-NEXT: s_cbranch_vccz .LBB10_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else86 ; SI-NEXT: v_bfi_b32 v11, s0, 0, v6 ; SI-NEXT: v_cvt_f32_f16_e32 v8, v8 ; SI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v10 @@ -7219,7 +7219,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_2: ; SI-NEXT: ; implicit-def: $vgpr8 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_3: ; %frem.compute +; SI-NEXT: .LBB10_3: ; %frem.compute85 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v9 @@ -7255,10 +7255,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_5: ; %frem.loop_body +; SI-NEXT: .LBB10_5: ; %frem.loop_body93 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v11, v9 ; SI-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -7273,7 +7273,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_5 ; SI-NEXT: ; %bb.6: ; %Flow133 ; SI-NEXT: v_mov_b32_e32 v9, v11 -; SI-NEXT: .LBB10_7: ; %frem.loop_exit +; SI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v9, v9, s1 ; SI-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -7294,7 +7294,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v11, |v11| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v11 ; SI-NEXT: s_cbranch_vccz .LBB10_10 -; SI-NEXT: ; %bb.9: ; %frem.else20 +; SI-NEXT: ; %bb.9: ; %frem.else53 ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v12, s0, 0, v4 ; SI-NEXT: v_cvt_f32_f16_e32 v9, v9 @@ -7306,7 +7306,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_10: ; SI-NEXT: ; implicit-def: $vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_11: ; %frem.compute19 +; SI-NEXT: .LBB10_11: ; %frem.compute52 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v10 @@ -7342,10 +7342,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_13: ; %frem.loop_body27 +; SI-NEXT: .LBB10_13: ; %frem.loop_body60 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v12, v10 ; SI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -7360,7 +7360,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_13 ; SI-NEXT: ; %bb.14: ; %Flow129 ; SI-NEXT: v_mov_b32_e32 v10, v12 -; SI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; SI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v10, v10, s1 ; SI-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -7381,7 +7381,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v12, |v12| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v12 ; SI-NEXT: s_cbranch_vccz .LBB10_18 -; SI-NEXT: ; %bb.17: ; %frem.else53 +; SI-NEXT: ; %bb.17: ; %frem.else20 ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v13, s0, 0, v2 ; SI-NEXT: v_cvt_f32_f16_e32 v10, v10 @@ -7393,7 +7393,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_18: ; SI-NEXT: ; implicit-def: $vgpr10 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_19: ; %frem.compute52 +; SI-NEXT: .LBB10_19: ; %frem.compute19 ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v11|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v11 @@ -7429,10 +7429,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_23 -; SI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; SI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_21: ; %frem.loop_body60 +; SI-NEXT: .LBB10_21: ; %frem.loop_body27 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v13, v11 ; SI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -7447,7 +7447,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_21 ; SI-NEXT: ; %bb.22: ; %Flow125 ; SI-NEXT: v_mov_b32_e32 v11, v13 -; SI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; SI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v11, v11, s1 ; SI-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -7468,7 +7468,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cvt_f32_f16_e64 v13, |v13| ; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v13 ; SI-NEXT: s_cbranch_vccz .LBB10_26 -; SI-NEXT: ; %bb.25: ; %frem.else86 +; SI-NEXT: ; %bb.25: ; %frem.else ; SI-NEXT: s_brev_b32 s0, -2 ; SI-NEXT: v_bfi_b32 v14, s0, 0, v0 ; SI-NEXT: v_cvt_f32_f16_e32 v11, v11 @@ -7480,7 +7480,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB10_26: ; SI-NEXT: ; implicit-def: $vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB10_27: ; %frem.compute85 +; SI-NEXT: .LBB10_27: ; %frem.compute ; SI-NEXT: s_mov_b32 s3, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v12|, s3 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v12 @@ -7516,10 +7516,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; SI-NEXT: s_cmp_lt_i32 s1, 12 ; SI-NEXT: s_cbranch_scc1 .LBB10_31 -; SI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s1, s2, s3 ; SI-NEXT: s_add_i32 s1, s1, 11 -; SI-NEXT: .LBB10_29: ; %frem.loop_body93 +; SI-NEXT: .LBB10_29: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v14, v12 ; SI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -7534,7 +7534,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB10_29 ; SI-NEXT: ; %bb.30: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v14 -; SI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; SI-NEXT: .LBB10_31: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s1, s1, -10 ; SI-NEXT: v_ldexp_f32_e64 v12, v12, s1 ; SI-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -7638,7 +7638,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_and_b32_e32 v9, 0x7fffffff, v7 ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB10_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else86 ; CI-NEXT: v_cvt_f32_f16_e32 v8, v8 ; CI-NEXT: v_bfi_b32 v11, s0, 0, v6 ; CI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9 @@ -7647,7 +7647,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_8 ; CI-NEXT: .LBB10_2: ; CI-NEXT: ; implicit-def: $vgpr8 -; CI-NEXT: .LBB10_3: ; %frem.compute +; CI-NEXT: .LBB10_3: ; %frem.compute85 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10 ; CI-NEXT: v_frexp_mant_f32_e32 v8, v10 ; CI-NEXT: v_frexp_mant_f32_e32 v10, v9 @@ -7672,10 +7672,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10 ; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14 ; CI-NEXT: v_add_i32_e32 v10, vcc, 11, v10 -; CI-NEXT: .LBB10_5: ; %frem.loop_body +; CI-NEXT: .LBB10_5: ; %frem.loop_body93 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v13, v11 ; CI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -7690,7 +7690,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_5 ; CI-NEXT: ; %bb.6: ; %Flow133 ; CI-NEXT: v_mov_b32_e32 v11, v13 -; CI-NEXT: .LBB10_7: ; %frem.loop_exit +; CI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; CI-NEXT: v_add_i32_e32 v10, vcc, -10, v10 ; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10 ; CI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -7711,7 +7711,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v10, |v10| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10 ; CI-NEXT: s_cbranch_vccz .LBB10_10 -; CI-NEXT: ; %bb.9: ; %frem.else20 +; CI-NEXT: ; %bb.9: ; %frem.else53 ; CI-NEXT: v_cvt_f32_f16_e32 v9, v9 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v12, s0, 0, v4 @@ -7721,7 +7721,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_16 ; CI-NEXT: .LBB10_10: ; CI-NEXT: ; implicit-def: $vgpr9 -; CI-NEXT: .LBB10_11: ; %frem.compute19 +; CI-NEXT: .LBB10_11: ; %frem.compute52 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v14, v11 ; CI-NEXT: v_frexp_mant_f32_e32 v9, v11 ; CI-NEXT: v_frexp_mant_f32_e32 v11, v10 @@ -7746,10 +7746,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11 ; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v11, vcc, 11, v11 -; CI-NEXT: .LBB10_13: ; %frem.loop_body27 +; CI-NEXT: .LBB10_13: ; %frem.loop_body60 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v14, v12 ; CI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -7764,7 +7764,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_13 ; CI-NEXT: ; %bb.14: ; %Flow129 ; CI-NEXT: v_mov_b32_e32 v12, v14 -; CI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; CI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; CI-NEXT: v_add_i32_e32 v11, vcc, -10, v11 ; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11 ; CI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -7785,7 +7785,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v11, |v11| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v11 ; CI-NEXT: s_cbranch_vccz .LBB10_18 -; CI-NEXT: ; %bb.17: ; %frem.else53 +; CI-NEXT: ; %bb.17: ; %frem.else20 ; CI-NEXT: v_cvt_f32_f16_e32 v10, v10 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v13, s0, 0, v2 @@ -7795,7 +7795,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_24 ; CI-NEXT: .LBB10_18: ; CI-NEXT: ; implicit-def: $vgpr10 -; CI-NEXT: .LBB10_19: ; %frem.compute52 +; CI-NEXT: .LBB10_19: ; %frem.compute19 ; CI-NEXT: v_frexp_exp_i32_f32_e32 v15, v12 ; CI-NEXT: v_frexp_mant_f32_e32 v10, v12 ; CI-NEXT: v_frexp_mant_f32_e32 v12, v11 @@ -7820,10 +7820,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v12 ; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_23 -; CI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16 ; CI-NEXT: v_add_i32_e32 v12, vcc, 11, v12 -; CI-NEXT: .LBB10_21: ; %frem.loop_body60 +; CI-NEXT: .LBB10_21: ; %frem.loop_body27 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v13 ; CI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -7838,7 +7838,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_21 ; CI-NEXT: ; %bb.22: ; %Flow125 ; CI-NEXT: v_mov_b32_e32 v13, v15 -; CI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; CI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; CI-NEXT: v_add_i32_e32 v12, vcc, -10, v12 ; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12 ; CI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -7859,7 +7859,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cvt_f32_f16_e64 v12, |v12| ; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12 ; CI-NEXT: s_cbranch_vccz .LBB10_26 -; CI-NEXT: ; %bb.25: ; %frem.else86 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: v_cvt_f32_f16_e32 v11, v11 ; CI-NEXT: s_brev_b32 s0, -2 ; CI-NEXT: v_bfi_b32 v14, s0, 0, v0 @@ -7869,7 +7869,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB10_32 ; CI-NEXT: .LBB10_26: ; CI-NEXT: ; implicit-def: $vgpr11 -; CI-NEXT: .LBB10_27: ; %frem.compute85 +; CI-NEXT: .LBB10_27: ; %frem.compute ; CI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13 ; CI-NEXT: v_frexp_mant_f32_e32 v11, v13 ; CI-NEXT: v_frexp_mant_f32_e32 v13, v12 @@ -7894,10 +7894,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13 ; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB10_31 -; CI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v13, vcc, 11, v13 -; CI-NEXT: .LBB10_29: ; %frem.loop_body93 +; CI-NEXT: .LBB10_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v16, v14 ; CI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -7912,7 +7912,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB10_29 ; CI-NEXT: ; %bb.30: ; %Flow ; CI-NEXT: v_mov_b32_e32 v14, v16 -; CI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; CI-NEXT: .LBB10_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v13, vcc, -10, v13 ; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13 ; CI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -8001,7 +8001,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v5, |v2| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; VI-NEXT: s_cbranch_vccz .LBB10_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else86 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -8010,7 +8010,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_8 ; VI-NEXT: .LBB10_2: ; VI-NEXT: ; implicit-def: $vgpr4 -; VI-NEXT: .LBB10_3: ; %frem.compute +; VI-NEXT: .LBB10_3: ; %frem.compute85 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; VI-NEXT: v_frexp_mant_f32_e32 v4, v6 ; VI-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -8035,10 +8035,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10 ; VI-NEXT: v_add_u32_e32 v6, vcc, 11, v6 -; VI-NEXT: .LBB10_5: ; %frem.loop_body +; VI-NEXT: .LBB10_5: ; %frem.loop_body93 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -8053,7 +8053,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_5 ; VI-NEXT: ; %bb.6: ; %Flow133 ; VI-NEXT: v_mov_b32_e32 v7, v9 -; VI-NEXT: .LBB10_7: ; %frem.loop_exit +; VI-NEXT: .LBB10_7: ; %frem.loop_exit94 ; VI-NEXT: v_add_u32_e32 v6, vcc, -10, v6 ; VI-NEXT: v_ldexp_f32 v6, v7, v6 ; VI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -8073,7 +8073,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v8, |v6| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8 ; VI-NEXT: s_cbranch_vccz .LBB10_10 -; VI-NEXT: ; %bb.9: ; %frem.else20 +; VI-NEXT: ; %bb.9: ; %frem.else53 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v7, s2, 0, v5 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8 @@ -8082,7 +8082,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_16 ; VI-NEXT: .LBB10_10: ; VI-NEXT: ; implicit-def: $vgpr7 -; VI-NEXT: .LBB10_11: ; %frem.compute19 +; VI-NEXT: .LBB10_11: ; %frem.compute52 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v12, v9 ; VI-NEXT: v_frexp_mant_f32_e32 v7, v9 ; VI-NEXT: v_frexp_mant_f32_e32 v9, v8 @@ -8107,10 +8107,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9 ; VI-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; VI-NEXT: v_sub_u32_e32 v9, vcc, v12, v13 ; VI-NEXT: v_add_u32_e32 v9, vcc, 11, v9 -; VI-NEXT: .LBB10_13: ; %frem.loop_body27 +; VI-NEXT: .LBB10_13: ; %frem.loop_body60 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v12, v10 ; VI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -8125,7 +8125,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_13 ; VI-NEXT: ; %bb.14: ; %Flow129 ; VI-NEXT: v_mov_b32_e32 v10, v12 -; VI-NEXT: .LBB10_15: ; %frem.loop_exit28 +; VI-NEXT: .LBB10_15: ; %frem.loop_exit61 ; VI-NEXT: v_add_u32_e32 v9, vcc, -10, v9 ; VI-NEXT: v_ldexp_f32 v9, v10, v9 ; VI-NEXT: v_mul_f32_e32 v10, v9, v11 @@ -8143,7 +8143,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v9, |v3| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v9 ; VI-NEXT: s_cbranch_vccz .LBB10_18 -; VI-NEXT: ; %bb.17: ; %frem.else53 +; VI-NEXT: ; %bb.17: ; %frem.else20 ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v8, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9 @@ -8152,7 +8152,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_24 ; VI-NEXT: .LBB10_18: ; VI-NEXT: ; implicit-def: $vgpr8 -; VI-NEXT: .LBB10_19: ; %frem.compute52 +; VI-NEXT: .LBB10_19: ; %frem.compute19 ; VI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10 ; VI-NEXT: v_frexp_mant_f32_e32 v8, v10 ; VI-NEXT: v_frexp_mant_f32_e32 v10, v9 @@ -8177,10 +8177,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10 ; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_23 -; VI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14 ; VI-NEXT: v_add_u32_e32 v10, vcc, 11, v10 -; VI-NEXT: .LBB10_21: ; %frem.loop_body60 +; VI-NEXT: .LBB10_21: ; %frem.loop_body27 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v13, v11 ; VI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -8195,7 +8195,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_21 ; VI-NEXT: ; %bb.22: ; %Flow125 ; VI-NEXT: v_mov_b32_e32 v11, v13 -; VI-NEXT: .LBB10_23: ; %frem.loop_exit61 +; VI-NEXT: .LBB10_23: ; %frem.loop_exit28 ; VI-NEXT: v_add_u32_e32 v10, vcc, -10, v10 ; VI-NEXT: v_ldexp_f32 v10, v11, v10 ; VI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -8215,7 +8215,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cvt_f32_f16_e64 v12, |v10| ; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12 ; VI-NEXT: s_cbranch_vccz .LBB10_26 -; VI-NEXT: ; %bb.25: ; %frem.else86 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_movk_i32 s2, 0x7fff ; VI-NEXT: v_bfi_b32 v11, s2, 0, v9 ; VI-NEXT: v_cmp_eq_f32_e32 vcc, v13, v12 @@ -8224,7 +8224,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB10_32 ; VI-NEXT: .LBB10_26: ; VI-NEXT: ; implicit-def: $vgpr11 -; VI-NEXT: .LBB10_27: ; %frem.compute85 +; VI-NEXT: .LBB10_27: ; %frem.compute ; VI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13 ; VI-NEXT: v_frexp_mant_f32_e32 v11, v13 ; VI-NEXT: v_frexp_mant_f32_e32 v13, v12 @@ -8249,10 +8249,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13 ; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB10_31 -; VI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v13, vcc, 11, v13 -; VI-NEXT: .LBB10_29: ; %frem.loop_body93 +; VI-NEXT: .LBB10_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v16, v14 ; VI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -8267,7 +8267,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB10_29 ; VI-NEXT: ; %bb.30: ; %Flow ; VI-NEXT: v_mov_b32_e32 v14, v16 -; VI-NEXT: .LBB10_31: ; %frem.loop_exit94 +; VI-NEXT: .LBB10_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v13, vcc, -10, v13 ; VI-NEXT: v_ldexp_f32 v13, v14, v13 ; VI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -8320,7 +8320,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v5, |v0| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5 ; GFX9-NEXT: s_cbranch_vccz .LBB10_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else86 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v2 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5 @@ -8329,7 +8329,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_8 ; GFX9-NEXT: .LBB10_2: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB10_3: ; %frem.compute +; GFX9-NEXT: .LBB10_3: ; %frem.compute85 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5 @@ -8354,10 +8354,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 11, v6 -; GFX9-NEXT: .LBB10_5: ; %frem.loop_body +; GFX9-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -8372,7 +8372,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_5 ; GFX9-NEXT: ; %bb.6: ; %Flow133 ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX9-NEXT: v_add_u32_e32 v6, -10, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -8391,7 +8391,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v7, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v8, v7 ; GFX9-NEXT: s_cbranch_vccz .LBB10_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else20 +; GFX9-NEXT: ; %bb.9: ; %frem.else53 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v6, s2, 0, v5 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v8, v7 @@ -8400,7 +8400,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_16 ; GFX9-NEXT: .LBB10_10: ; GFX9-NEXT: ; implicit-def: $vgpr6 -; GFX9-NEXT: .LBB10_11: ; %frem.compute19 +; GFX9-NEXT: .LBB10_11: ; %frem.compute52 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v11, v8 ; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v8 ; GFX9-NEXT: v_frexp_mant_f32_e32 v8, v7 @@ -8425,10 +8425,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v8 ; GFX9-NEXT: v_div_fixup_f32 v10, v10, v7, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX9-NEXT: v_sub_u32_e32 v8, v11, v12 ; GFX9-NEXT: v_add_u32_e32 v8, 11, v8 -; GFX9-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX9-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v11, v9 ; GFX9-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -8443,7 +8443,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_13 ; GFX9-NEXT: ; %bb.14: ; %Flow129 ; GFX9-NEXT: v_mov_b32_e32 v9, v11 -; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX9-NEXT: v_add_u32_e32 v8, -10, v8 ; GFX9-NEXT: v_ldexp_f32 v8, v9, v8 ; GFX9-NEXT: v_mul_f32_e32 v9, v8, v10 @@ -8461,7 +8461,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_e64 v8, |v1| ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8 ; GFX9-NEXT: s_cbranch_vccz .LBB10_18 -; GFX9-NEXT: ; %bb.17: ; %frem.else53 +; GFX9-NEXT: ; %bb.17: ; %frem.else20 ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v7, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8 @@ -8470,7 +8470,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_24 ; GFX9-NEXT: .LBB10_18: ; GFX9-NEXT: ; implicit-def: $vgpr7 -; GFX9-NEXT: .LBB10_19: ; %frem.compute52 +; GFX9-NEXT: .LBB10_19: ; %frem.compute19 ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v12, v9 ; GFX9-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v8 @@ -8495,10 +8495,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9 ; GFX9-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX9-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX9-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX9-NEXT: v_sub_u32_e32 v9, v12, v13 ; GFX9-NEXT: v_add_u32_e32 v9, 11, v9 -; GFX9-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX9-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v12, v10 ; GFX9-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -8513,7 +8513,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_21 ; GFX9-NEXT: ; %bb.22: ; %Flow125 ; GFX9-NEXT: v_mov_b32_e32 v10, v12 -; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX9-NEXT: v_add_u32_e32 v9, -10, v9 ; GFX9-NEXT: v_ldexp_f32 v9, v10, v9 ; GFX9-NEXT: v_mul_f32_e32 v10, v9, v11 @@ -8532,7 +8532,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cvt_f32_f16_sdwa v10, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10 ; GFX9-NEXT: s_cbranch_vccz .LBB10_26 -; GFX9-NEXT: ; %bb.25: ; %frem.else86 +; GFX9-NEXT: ; %bb.25: ; %frem.else ; GFX9-NEXT: s_movk_i32 s2, 0x7fff ; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v8 ; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v11, v10 @@ -8541,7 +8541,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB10_32 ; GFX9-NEXT: .LBB10_26: ; GFX9-NEXT: ; implicit-def: $vgpr9 -; GFX9-NEXT: .LBB10_27: ; %frem.compute85 +; GFX9-NEXT: .LBB10_27: ; %frem.compute ; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v14, v11 ; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v11 ; GFX9-NEXT: v_frexp_mant_f32_e32 v11, v10 @@ -8566,10 +8566,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11 ; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX9-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v11, 11, v11 -; GFX9-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX9-NEXT: .LBB10_29: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v14, v12 ; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -8584,7 +8584,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB10_29 ; GFX9-NEXT: ; %bb.30: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v12, v14 -; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v11, -10, v11 ; GFX9-NEXT: v_ldexp_f32 v11, v12, v11 ; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -8640,7 +8640,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v5, |v0| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX10-NEXT: s_cbranch_vccz .LBB10_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else86 ; GFX10-NEXT: v_bfi_b32 v4, 0x7fff, 0, v2 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc_lo @@ -8648,7 +8648,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_8 ; GFX10-NEXT: .LBB10_2: ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: .LBB10_3: ; %frem.compute +; GFX10-NEXT: .LBB10_3: ; %frem.compute85 ; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX10-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -8675,10 +8675,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_5: ; %frem.loop_body +; GFX10-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8694,7 +8694,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow133 ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -8712,7 +8712,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v5| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v8, v7 ; GFX10-NEXT: s_cbranch_vccz .LBB10_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else20 +; GFX10-NEXT: ; %bb.9: ; %frem.else53 ; GFX10-NEXT: v_bfi_b32 v6, 0x7fff, 0, v5 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v8, v7 ; GFX10-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc_lo @@ -8720,7 +8720,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_16 ; GFX10-NEXT: .LBB10_10: ; GFX10-NEXT: ; implicit-def: $vgpr6 -; GFX10-NEXT: .LBB10_11: ; %frem.compute19 +; GFX10-NEXT: .LBB10_11: ; %frem.compute52 ; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v8 ; GFX10-NEXT: v_frexp_mant_f32_e32 v10, v7 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v9, v8 @@ -8747,10 +8747,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v10 ; GFX10-NEXT: v_div_fixup_f32 v9, v9, v7, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX10-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v11, v8 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8766,7 +8766,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow129 ; GFX10-NEXT: v_mov_b32_e32 v10, s2 ; GFX10-NEXT: v_mov_b32_e32 v8, v11 -; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX10-NEXT: v_add_nc_u32_e32 v10, -10, v10 ; GFX10-NEXT: v_ldexp_f32 v8, v8, v10 ; GFX10-NEXT: v_mul_f32_e32 v9, v8, v9 @@ -8783,7 +8783,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v1| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX10-NEXT: s_cbranch_vccz .LBB10_18 -; GFX10-NEXT: ; %bb.17: ; %frem.else53 +; GFX10-NEXT: ; %bb.17: ; %frem.else20 ; GFX10-NEXT: v_bfi_b32 v7, 0x7fff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 ; GFX10-NEXT: v_cndmask_b32_e32 v7, v3, v7, vcc_lo @@ -8791,7 +8791,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_24 ; GFX10-NEXT: .LBB10_18: ; GFX10-NEXT: ; implicit-def: $vgpr7 -; GFX10-NEXT: .LBB10_19: ; %frem.compute52 +; GFX10-NEXT: .LBB10_19: ; %frem.compute19 ; GFX10-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX10-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -8818,10 +8818,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v11 ; GFX10-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX10-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX10-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX10-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v12, v9 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8837,7 +8837,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.22: ; %Flow125 ; GFX10-NEXT: v_mov_b32_e32 v11, s2 ; GFX10-NEXT: v_mov_b32_e32 v9, v12 -; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX10-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX10-NEXT: v_ldexp_f32 v9, v9, v11 ; GFX10-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -8855,7 +8855,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cvt_f32_f16_e64 v11, |v8| ; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v11, v10 ; GFX10-NEXT: s_cbranch_vccz .LBB10_26 -; GFX10-NEXT: ; %bb.25: ; %frem.else86 +; GFX10-NEXT: ; %bb.25: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v9, 0x7fff, 0, v8 ; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v11, v10 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc_lo @@ -8863,7 +8863,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB10_32 ; GFX10-NEXT: .LBB10_26: ; GFX10-NEXT: ; implicit-def: $vgpr9 -; GFX10-NEXT: .LBB10_27: ; %frem.compute85 +; GFX10-NEXT: .LBB10_27: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e32 v9, v11 ; GFX10-NEXT: v_frexp_mant_f32_e32 v13, v10 ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v11 @@ -8890,10 +8890,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v13 ; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX10-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 11 -; GFX10-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX10-NEXT: .LBB10_29: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v14, v11 ; GFX10-NEXT: s_add_i32 s2, s2, -11 @@ -8909,7 +8909,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.30: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v13, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v14 -; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v13, -10, v13 ; GFX10-NEXT: v_ldexp_f32 v11, v11, v13 ; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -8963,7 +8963,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_2 -; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 @@ -8974,7 +8974,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_8 ; GFX11-TRUE16-NEXT: .LBB10_2: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -9010,11 +9010,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v6 @@ -9034,7 +9034,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v6, v6, v8 @@ -9061,7 +9061,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_10 -; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 @@ -9072,7 +9072,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_16 ; GFX11-TRUE16-NEXT: .LBB10_10: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -9108,11 +9108,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, v9 @@ -9132,7 +9132,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v11, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v12 -; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v9, v9, v11 @@ -9156,7 +9156,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_18 -; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9 @@ -9167,7 +9167,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_24 ; GFX11-TRUE16-NEXT: .LBB10_18: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8 -; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v10 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v12, v9 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10 @@ -9203,11 +9203,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v10 @@ -9227,7 +9227,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, -10, v12 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v10, v10, v12 @@ -9254,7 +9254,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12 ; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_26 -; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, 0 ; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12 @@ -9265,7 +9265,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_branch .LBB10_32 ; GFX11-TRUE16-NEXT: .LBB10_26: ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11 -; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v13 ; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v15, v12 ; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13 @@ -9301,11 +9301,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v16, v13 @@ -9325,7 +9325,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, -10, v15 ; GFX11-TRUE16-NEXT: v_ldexp_f32 v13, v13, v15 @@ -9388,7 +9388,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_2 -; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX11-FAKE16-NEXT: v_bfi_b32 v4, 0x7fff, 0, v0 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9397,7 +9397,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_8 ; GFX11-FAKE16-NEXT: .LBB10_2: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, v6 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v5 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6 @@ -9433,11 +9433,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v6 @@ -9457,7 +9457,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, -10, v8 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v6, v6, v8 @@ -9483,7 +9483,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_10 -; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX11-FAKE16-NEXT: v_bfi_b32 v7, 0x7fff, 0, v5 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9492,7 +9492,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_16 ; GFX11-FAKE16-NEXT: .LBB10_10: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7 -; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v7, v9 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v8 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9 @@ -9528,11 +9528,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, v9 @@ -9552,7 +9552,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v11, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v12 -; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, -10, v11 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v9, v9, v11 @@ -9575,7 +9575,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_18 -; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX11-FAKE16-NEXT: v_bfi_b32 v8, 0x7fff, 0, v1 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9584,7 +9584,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_24 ; GFX11-FAKE16-NEXT: .LBB10_18: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8 -; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v10 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v12, v9 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10 @@ -9620,11 +9620,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v10 @@ -9644,7 +9644,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, -10, v12 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v10, v10, v12 @@ -9670,7 +9670,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12 ; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_26 -; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX11-FAKE16-NEXT: v_bfi_b32 v11, 0x7fff, 0, v9 ; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -9679,7 +9679,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_branch .LBB10_32 ; GFX11-FAKE16-NEXT: .LBB10_26: ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11 -; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v13 ; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v15, v12 ; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13 @@ -9715,11 +9715,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11 -; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v16, v13 @@ -9739,7 +9739,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, -10, v15 ; GFX11-FAKE16-NEXT: v_ldexp_f32 v13, v13, v15 @@ -9804,7 +9804,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s6 @@ -9816,7 +9816,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_8 ; GFX1150-TRUE16-NEXT: .LBB10_2: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -9851,11 +9851,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s6, s8, s6 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s6, s6, 11 -; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -9877,7 +9877,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -9907,7 +9907,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -9919,7 +9919,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_16 ; GFX1150-TRUE16-NEXT: .LBB10_10: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -9954,11 +9954,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -9980,7 +9980,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10008,7 +10008,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10020,7 +10020,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_24 ; GFX1150-TRUE16-NEXT: .LBB10_18: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10055,11 +10055,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -10081,7 +10081,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -10111,7 +10111,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10 ; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s12, s11 @@ -10123,7 +10123,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_branch .LBB10_32 ; GFX1150-TRUE16-NEXT: .LBB10_26: ; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3 -; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -10158,11 +10158,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-TRUE16-NEXT: s_sub_i32 s11, s12, s11 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_add_i32 s11, s11, 11 -; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v8, v5 @@ -10184,7 +10184,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1150-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -10265,7 +10265,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s6 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10275,7 +10275,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_8 ; GFX1150-FAKE16-NEXT: .LBB10_2: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -10310,11 +10310,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s6, s8, s6 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s6, s6, 11 -; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -10336,7 +10336,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -10365,7 +10365,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10375,7 +10375,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_16 ; GFX1150-FAKE16-NEXT: .LBB10_10: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -10410,11 +10410,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -10436,7 +10436,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10463,7 +10463,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10473,7 +10473,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_24 ; GFX1150-FAKE16-NEXT: .LBB10_18: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10508,11 +10508,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11 -; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -10534,7 +10534,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -10563,7 +10563,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s12, s11 ; GFX1150-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10 ; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -10573,7 +10573,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: s_branch .LBB10_32 ; GFX1150-FAKE16-NEXT: .LBB10_26: ; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr3 -; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -10608,11 +10608,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-FAKE16-NEXT: s_sub_i32 s11, s12, s11 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: s_add_i32 s11, s11, 11 -; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v8, v5 @@ -10634,7 +10634,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1150-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -10712,7 +10712,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s6 @@ -10724,7 +10724,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_8 ; GFX1200-TRUE16-NEXT: .LBB10_2: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -10759,11 +10759,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s6, s8, s6 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s6, s6, 11 -; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2 @@ -10787,7 +10787,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -10821,7 +10821,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10833,7 +10833,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_16 ; GFX1200-TRUE16-NEXT: .LBB10_10: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -10869,11 +10869,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3 @@ -10897,7 +10897,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -10928,7 +10928,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9 @@ -10941,7 +10941,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_24 ; GFX1200-TRUE16-NEXT: .LBB10_18: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2 -; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -10977,11 +10977,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, v4 @@ -11005,7 +11005,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -11039,7 +11039,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10 ; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0 ; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s12, s11 @@ -11051,7 +11051,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_branch .LBB10_32 ; GFX1200-TRUE16-NEXT: .LBB10_26: ; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3 -; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -11087,11 +11087,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-TRUE16-NEXT: s_sub_co_i32 s11, s12, s11 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_add_co_i32 s11, s11, 11 -; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v8, v5 @@ -11115,7 +11115,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: ; %bb.30: ; %Flow ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1200-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -11203,7 +11203,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2 -; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else +; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else86 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s6 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11213,7 +11213,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_8 ; GFX1200-FAKE16-NEXT: .LBB10_2: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0 -; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute +; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute85 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -11249,11 +11249,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7 -; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s6, s8, s6 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s6, s6, 11 -; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body +; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2 @@ -11277,7 +11277,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow133 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s6 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit +; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4 @@ -11310,7 +11310,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10 -; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20 +; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else53 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11321,7 +11321,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_16 ; GFX1200-FAKE16-NEXT: .LBB10_10: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1 -; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute19 +; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute52 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -11357,11 +11357,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15 -; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader +; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27 +; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3 @@ -11385,7 +11385,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow129 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s9 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28 +; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5 @@ -11415,7 +11415,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18 -; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else53 +; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else20 ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11426,7 +11426,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_24 ; GFX1200-FAKE16-NEXT: .LBB10_18: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr2 -; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute52 +; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute19 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10 @@ -11462,11 +11462,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23 -; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader +; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11 -; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60 +; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27 ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, v4 @@ -11490,7 +11490,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.22: ; %Flow125 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, s9 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61 +; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6 @@ -11523,7 +11523,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2) ; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11 ; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26 -; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else86 +; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else ; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s12, s11 ; GFX1200-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10 ; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -11534,7 +11534,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: s_branch .LBB10_32 ; GFX1200-FAKE16-NEXT: .LBB10_26: ; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr3 -; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute85 +; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11 ; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12 ; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12 @@ -11570,11 +11570,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7 ; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31 -; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader +; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-FAKE16-NEXT: s_sub_co_i32 s11, s12, s11 ; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe ; GFX1200-FAKE16-NEXT: s_add_co_i32 s11, s11, 11 -; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93 +; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body ; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v8, v5 @@ -11598,7 +11598,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-FAKE16-NEXT: ; %bb.30: ; %Flow ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94 +; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit ; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7 ; GFX1200-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7 @@ -11686,7 +11686,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB11_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else16 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -11697,7 +11697,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB11_2: ; SI-NEXT: ; implicit-def: $vgpr4 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB11_3: ; %frem.compute +; SI-NEXT: .LBB11_3: ; %frem.compute15 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v0 @@ -11733,10 +11733,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB11_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB11_5: ; %frem.loop_body +; SI-NEXT: .LBB11_5: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v7, v5 ; SI-NEXT: v_mul_f32_e32 v5, v7, v6 @@ -11751,7 +11751,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB11_5 ; SI-NEXT: ; %bb.6: ; %Flow51 ; SI-NEXT: v_mov_b32_e32 v5, v7 -; SI-NEXT: .LBB11_7: ; %frem.loop_exit +; SI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v5, v5, s3 ; SI-NEXT: v_mul_f32_e32 v6, v5, v6 @@ -11767,7 +11767,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB11_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -11778,7 +11778,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB11_10: ; SI-NEXT: ; implicit-def: $vgpr5 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB11_11: ; %frem.compute15 +; SI-NEXT: .LBB11_11: ; %frem.compute ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v1 @@ -11814,10 +11814,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB11_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB11_13: ; %frem.loop_body23 +; SI-NEXT: .LBB11_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v8, v6 ; SI-NEXT: v_mul_f32_e32 v6, v8, v7 @@ -11832,7 +11832,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB11_13 ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v6, v8 -; SI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB11_15: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v6, v6, s3 ; SI-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -11877,7 +11877,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB11_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -11886,7 +11886,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_8 ; CI-NEXT: .LBB11_2: ; CI-NEXT: ; implicit-def: $vgpr4 -; CI-NEXT: .LBB11_3: ; %frem.compute +; CI-NEXT: .LBB11_3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; CI-NEXT: v_ldexp_f32_e64 v5, v5, 1 ; CI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -11911,10 +11911,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10 ; CI-NEXT: v_add_i32_e32 v6, vcc, 12, v6 -; CI-NEXT: .LBB11_5: ; %frem.loop_body +; CI-NEXT: .LBB11_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v9, v7 ; CI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -11929,7 +11929,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB11_5 ; CI-NEXT: ; %bb.6: ; %Flow51 ; CI-NEXT: v_mov_b32_e32 v7, v9 -; CI-NEXT: .LBB11_7: ; %frem.loop_exit +; CI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v6, vcc, -11, v6 ; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6 ; CI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -11945,7 +11945,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB11_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -11954,7 +11954,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB11_16 ; CI-NEXT: .LBB11_10: ; CI-NEXT: ; implicit-def: $vgpr5 -; CI-NEXT: .LBB11_11: ; %frem.compute15 +; CI-NEXT: .LBB11_11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; CI-NEXT: v_ldexp_f32_e64 v6, v6, 1 ; CI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -11979,10 +11979,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB11_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11 ; CI-NEXT: v_add_i32_e32 v7, vcc, 12, v7 -; CI-NEXT: .LBB11_13: ; %frem.loop_body23 +; CI-NEXT: .LBB11_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v10, v8 ; CI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -11997,7 +11997,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB11_13 ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v8, v10 -; CI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB11_15: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v7, vcc, -11, v7 ; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7 ; CI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12042,7 +12042,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB11_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v4, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -12051,7 +12051,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_8 ; VI-NEXT: .LBB11_2: ; VI-NEXT: ; implicit-def: $vgpr4 -; VI-NEXT: .LBB11_3: ; %frem.compute +; VI-NEXT: .LBB11_3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; VI-NEXT: v_ldexp_f32 v5, v5, 1 ; VI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -12076,10 +12076,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10 ; VI-NEXT: v_add_u32_e32 v6, vcc, 12, v6 -; VI-NEXT: .LBB11_5: ; %frem.loop_body +; VI-NEXT: .LBB11_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v9, v7 ; VI-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -12094,7 +12094,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB11_5 ; VI-NEXT: ; %bb.6: ; %Flow51 ; VI-NEXT: v_mov_b32_e32 v7, v9 -; VI-NEXT: .LBB11_7: ; %frem.loop_exit +; VI-NEXT: .LBB11_7: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v6, vcc, -11, v6 ; VI-NEXT: v_ldexp_f32 v6, v7, v6 ; VI-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -12110,7 +12110,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB11_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v5, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -12119,7 +12119,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB11_16 ; VI-NEXT: .LBB11_10: ; VI-NEXT: ; implicit-def: $vgpr5 -; VI-NEXT: .LBB11_11: ; %frem.compute15 +; VI-NEXT: .LBB11_11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; VI-NEXT: v_ldexp_f32 v6, v6, 1 ; VI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -12144,10 +12144,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB11_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11 ; VI-NEXT: v_add_u32_e32 v7, vcc, 12, v7 -; VI-NEXT: .LBB11_13: ; %frem.loop_body23 +; VI-NEXT: .LBB11_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v10, v8 ; VI-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -12162,7 +12162,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB11_13 ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v8, v10 -; VI-NEXT: .LBB11_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB11_15: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v7, vcc, -11, v7 ; VI-NEXT: v_ldexp_f32 v7, v8, v7 ; VI-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12202,7 +12202,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB11_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else16 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v0 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2| @@ -12211,7 +12211,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB11_8 ; GFX9-NEXT: .LBB11_2: ; GFX9-NEXT: ; implicit-def: $vgpr4 -; GFX9-NEXT: .LBB11_3: ; %frem.compute +; GFX9-NEXT: .LBB11_3: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX9-NEXT: v_ldexp_f32 v5, v5, 1 ; GFX9-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0 @@ -12236,10 +12236,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6 ; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10 ; GFX9-NEXT: v_add_u32_e32 v6, 12, v6 -; GFX9-NEXT: .LBB11_5: ; %frem.loop_body +; GFX9-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v9, v7 ; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8 @@ -12254,7 +12254,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB11_5 ; GFX9-NEXT: ; %bb.6: ; %Flow51 ; GFX9-NEXT: v_mov_b32_e32 v7, v9 -; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX9-NEXT: v_add_u32_e32 v6, -11, v6 ; GFX9-NEXT: v_ldexp_f32 v6, v7, v6 ; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8 @@ -12270,7 +12270,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB11_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v5, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3| @@ -12279,7 +12279,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB11_16 ; GFX9-NEXT: .LBB11_10: ; GFX9-NEXT: ; implicit-def: $vgpr5 -; GFX9-NEXT: .LBB11_11: ; %frem.compute15 +; GFX9-NEXT: .LBB11_11: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX9-NEXT: v_ldexp_f32 v6, v6, 1 ; GFX9-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0 @@ -12304,10 +12304,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7 ; GFX9-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v7, v10, v11 ; GFX9-NEXT: v_add_u32_e32 v7, 12, v7 -; GFX9-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB11_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v10, v8 ; GFX9-NEXT: v_mul_f32_e32 v8, v10, v9 @@ -12322,7 +12322,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB11_13 ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v8, v10 -; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v7, -11, v7 ; GFX9-NEXT: v_ldexp_f32 v7, v8, v7 ; GFX9-NEXT: v_mul_f32_e32 v8, v7, v9 @@ -12363,7 +12363,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB11_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else16 ; GFX10-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2| ; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc_lo @@ -12371,7 +12371,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB11_8 ; GFX10-NEXT: .LBB11_2: ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: .LBB11_3: ; %frem.compute +; GFX10-NEXT: .LBB11_3: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX10-NEXT: v_frexp_mant_f32_e64 v4, |v0| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v0 @@ -12398,10 +12398,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v8 ; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB11_5: ; %frem.loop_body +; GFX10-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v9, v6 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -12417,7 +12417,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow51 ; GFX10-NEXT: v_mov_b32_e32 v8, s2 ; GFX10-NEXT: v_mov_b32_e32 v6, v9 -; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX10-NEXT: v_add_nc_u32_e32 v8, -11, v8 ; GFX10-NEXT: v_ldexp_f32 v6, v6, v8 ; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7 @@ -12432,7 +12432,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB11_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3| ; GFX10-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc_lo @@ -12440,7 +12440,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB11_16 ; GFX10-NEXT: .LBB11_10: ; GFX10-NEXT: ; implicit-def: $vgpr5 -; GFX10-NEXT: .LBB11_11: ; %frem.compute15 +; GFX10-NEXT: .LBB11_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v1| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v8, v1 @@ -12467,10 +12467,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v9 ; GFX10-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB11_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v10, v7 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -12486,7 +12486,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v9, s2 ; GFX10-NEXT: v_mov_b32_e32 v7, v10 -; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v9, -11, v9 ; GFX10-NEXT: v_ldexp_f32 v7, v7, v9 ; GFX10-NEXT: v_mul_f32_e32 v8, v7, v8 @@ -12524,7 +12524,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB11_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else16 ; GFX11-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -12533,7 +12533,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB11_8 ; GFX11-NEXT: .LBB11_2: ; GFX11-NEXT: ; implicit-def: $vgpr4 -; GFX11-NEXT: .LBB11_3: ; %frem.compute +; GFX11-NEXT: .LBB11_3: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v2| ; GFX11-NEXT: v_frexp_mant_f32_e64 v4, |v0| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v7, v0 @@ -12569,11 +12569,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB11_5: ; %frem.loop_body +; GFX11-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v9, v6 @@ -12593,7 +12593,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow51 ; GFX11-NEXT: v_mov_b32_e32 v8, s2 ; GFX11-NEXT: v_mov_b32_e32 v6, v9 -; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v8, -11, v8 ; GFX11-NEXT: v_ldexp_f32 v6, v6, v8 @@ -12613,7 +12613,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB11_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else ; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -12622,7 +12622,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB11_16 ; GFX11-NEXT: .LBB11_10: ; GFX11-NEXT: ; implicit-def: $vgpr5 -; GFX11-NEXT: .LBB11_11: ; %frem.compute15 +; GFX11-NEXT: .LBB11_11: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f32_e64 v6, |v3| ; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v1| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v8, v1 @@ -12658,11 +12658,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB11_13: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v10, v7 @@ -12682,7 +12682,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow ; GFX11-NEXT: v_mov_b32_e32 v9, s2 ; GFX11-NEXT: v_mov_b32_e32 v7, v10 -; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v9, -11, v9 ; GFX11-NEXT: v_ldexp_f32 v7, v7, v9 @@ -12730,7 +12730,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s3, s8 ; GFX1150-NEXT: s_cbranch_scc0 .LBB11_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else16 ; GFX1150-NEXT: s_cmp_eq_f32 s3, s8 ; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12740,7 +12740,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB11_8 ; GFX1150-NEXT: .LBB11_2: ; GFX1150-NEXT: ; implicit-def: $vgpr0 -; GFX1150-NEXT: .LBB11_3: ; %frem.compute +; GFX1150-NEXT: .LBB11_3: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s6| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -12775,11 +12775,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s7, s7, s8 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s7, s7, 12 -; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v5, v2 @@ -12801,7 +12801,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow51 ; GFX1150-NEXT: v_mov_b32_e32 v4, s7 ; GFX1150-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4 @@ -12824,7 +12824,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s6, s8 ; GFX1150-NEXT: s_cbranch_scc0 .LBB11_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else ; GFX1150-NEXT: s_cmp_eq_f32 s6, s8 ; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12834,7 +12834,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB11_16 ; GFX1150-NEXT: .LBB11_10: ; GFX1150-NEXT: ; implicit-def: $vgpr1 -; GFX1150-NEXT: .LBB11_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB11_11: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s2| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s5| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s5 @@ -12869,11 +12869,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s7, s7, s8 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s7, s7, 12 -; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v6, v3 @@ -12895,7 +12895,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow ; GFX1150-NEXT: v_mov_b32_e32 v5, s7 ; GFX1150-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5 @@ -12950,7 +12950,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_cmp_ngt_f32 s3, s8 ; GFX1200-NEXT: s_cbranch_scc0 .LBB11_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else16 ; GFX1200-NEXT: s_cmp_eq_f32 s3, s8 ; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -12960,7 +12960,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB11_8 ; GFX1200-NEXT: .LBB11_2: ; GFX1200-NEXT: ; implicit-def: $vgpr0 -; GFX1200-NEXT: .LBB11_3: ; %frem.compute +; GFX1200-NEXT: .LBB11_3: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s4| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s6| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s6 @@ -12996,11 +12996,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB11_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s7, s7, 12 -; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v5, v2 @@ -13024,7 +13024,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow51 ; GFX1200-NEXT: v_mov_b32_e32 v4, s7 ; GFX1200-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4 @@ -13048,7 +13048,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s6, s8 ; GFX1200-NEXT: s_cbranch_scc0 .LBB11_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else ; GFX1200-NEXT: s_cmp_eq_f32 s6, s8 ; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -13059,7 +13059,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB11_16 ; GFX1200-NEXT: .LBB11_10: ; GFX1200-NEXT: ; implicit-def: $vgpr1 -; GFX1200-NEXT: .LBB11_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB11_11: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s2| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s5| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s5 @@ -13095,11 +13095,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB11_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s7, s7, 12 -; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v6, v3 @@ -13123,7 +13123,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow ; GFX1200-NEXT: v_mov_b32_e32 v5, s7 ; GFX1200-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5 @@ -13187,7 +13187,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else78 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13198,7 +13198,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_2: ; SI-NEXT: ; implicit-def: $vgpr8 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_3: ; %frem.compute +; SI-NEXT: .LBB12_3: ; %frem.compute77 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v0 @@ -13234,10 +13234,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_5: ; %frem.loop_body +; SI-NEXT: .LBB12_5: ; %frem.loop_body85 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v11, v9 ; SI-NEXT: v_mul_f32_e32 v9, v11, v10 @@ -13252,7 +13252,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_5 ; SI-NEXT: ; %bb.6: ; %Flow125 ; SI-NEXT: v_mov_b32_e32 v9, v11 -; SI-NEXT: .LBB12_7: ; %frem.loop_exit +; SI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v9, v9, s3 ; SI-NEXT: v_mul_f32_e32 v10, v9, v10 @@ -13268,7 +13268,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else47 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13279,7 +13279,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_10: ; SI-NEXT: ; implicit-def: $vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_11: ; %frem.compute15 +; SI-NEXT: .LBB12_11: ; %frem.compute46 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v1 @@ -13315,10 +13315,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_13: ; %frem.loop_body23 +; SI-NEXT: .LBB12_13: ; %frem.loop_body54 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v12, v10 ; SI-NEXT: v_mul_f32_e32 v10, v12, v11 @@ -13333,7 +13333,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_13 ; SI-NEXT: ; %bb.14: ; %Flow121 ; SI-NEXT: v_mov_b32_e32 v10, v12 -; SI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v10, v10, s3 ; SI-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -13349,7 +13349,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_18 -; SI-NEXT: ; %bb.17: ; %frem.else47 +; SI-NEXT: ; %bb.17: ; %frem.else16 ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -13360,7 +13360,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_18: ; SI-NEXT: ; implicit-def: $vgpr10 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_19: ; %frem.compute46 +; SI-NEXT: .LBB12_19: ; %frem.compute15 ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v2 @@ -13396,10 +13396,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_23 -; SI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; SI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_21: ; %frem.loop_body54 +; SI-NEXT: .LBB12_21: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v13, v11 ; SI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13414,7 +13414,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_21 ; SI-NEXT: ; %bb.22: ; %Flow117 ; SI-NEXT: v_mov_b32_e32 v11, v13 -; SI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; SI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v11, v11, s3 ; SI-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -13430,7 +13430,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_cbranch_vccz .LBB12_26 -; SI-NEXT: ; %bb.25: ; %frem.else78 +; SI-NEXT: ; %bb.25: ; %frem.else ; SI-NEXT: s_brev_b32 s2, -2 ; SI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -13441,7 +13441,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB12_26: ; SI-NEXT: ; implicit-def: $vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB12_27: ; %frem.compute77 +; SI-NEXT: .LBB12_27: ; %frem.compute ; SI-NEXT: s_mov_b32 s6, 0x7f800000 ; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s6 ; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v3 @@ -13477,10 +13477,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; SI-NEXT: s_cmp_lt_i32 s3, 13 ; SI-NEXT: s_cbranch_scc1 .LBB12_31 -; SI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s3, s4, s5 ; SI-NEXT: s_add_i32 s3, s3, 12 -; SI-NEXT: .LBB12_29: ; %frem.loop_body85 +; SI-NEXT: .LBB12_29: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v14, v12 ; SI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13495,7 +13495,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: s_cbranch_scc1 .LBB12_29 ; SI-NEXT: ; %bb.30: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v14 -; SI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; SI-NEXT: .LBB12_31: ; %frem.loop_exit ; SI-NEXT: s_add_i32 s3, s3, -11 ; SI-NEXT: v_ldexp_f32_e64 v12, v12, s3 ; SI-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -13548,7 +13548,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else78 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13557,7 +13557,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_8 ; CI-NEXT: .LBB12_2: ; CI-NEXT: ; implicit-def: $vgpr8 -; CI-NEXT: .LBB12_3: ; %frem.compute +; CI-NEXT: .LBB12_3: ; %frem.compute77 ; CI-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; CI-NEXT: v_ldexp_f32_e64 v9, v9, 1 ; CI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -13582,10 +13582,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14 ; CI-NEXT: v_add_i32_e32 v10, vcc, 12, v10 -; CI-NEXT: .LBB12_5: ; %frem.loop_body +; CI-NEXT: .LBB12_5: ; %frem.loop_body85 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v13, v11 ; CI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13600,7 +13600,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_5 ; CI-NEXT: ; %bb.6: ; %Flow125 ; CI-NEXT: v_mov_b32_e32 v11, v13 -; CI-NEXT: .LBB12_7: ; %frem.loop_exit +; CI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; CI-NEXT: v_add_i32_e32 v10, vcc, -11, v10 ; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10 ; CI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -13616,7 +13616,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else47 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13625,7 +13625,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_16 ; CI-NEXT: .LBB12_10: ; CI-NEXT: ; implicit-def: $vgpr9 -; CI-NEXT: .LBB12_11: ; %frem.compute15 +; CI-NEXT: .LBB12_11: ; %frem.compute46 ; CI-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; CI-NEXT: v_ldexp_f32_e64 v10, v10, 1 ; CI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -13650,10 +13650,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v11, vcc, 12, v11 -; CI-NEXT: .LBB12_13: ; %frem.loop_body23 +; CI-NEXT: .LBB12_13: ; %frem.loop_body54 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v14, v12 ; CI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13668,7 +13668,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_13 ; CI-NEXT: ; %bb.14: ; %Flow121 ; CI-NEXT: v_mov_b32_e32 v12, v14 -; CI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; CI-NEXT: v_add_i32_e32 v11, vcc, -11, v11 ; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11 ; CI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -13684,7 +13684,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_18 -; CI-NEXT: ; %bb.17: ; %frem.else47 +; CI-NEXT: ; %bb.17: ; %frem.else16 ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -13693,7 +13693,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_24 ; CI-NEXT: .LBB12_18: ; CI-NEXT: ; implicit-def: $vgpr10 -; CI-NEXT: .LBB12_19: ; %frem.compute46 +; CI-NEXT: .LBB12_19: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; CI-NEXT: v_ldexp_f32_e64 v11, v11, 1 ; CI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -13718,10 +13718,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_23 -; CI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; CI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16 ; CI-NEXT: v_add_i32_e32 v12, vcc, 12, v12 -; CI-NEXT: .LBB12_21: ; %frem.loop_body54 +; CI-NEXT: .LBB12_21: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v13 ; CI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -13736,7 +13736,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_21 ; CI-NEXT: ; %bb.22: ; %Flow117 ; CI-NEXT: v_mov_b32_e32 v13, v15 -; CI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; CI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; CI-NEXT: v_add_i32_e32 v12, vcc, -11, v12 ; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12 ; CI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -13752,7 +13752,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB12_26 -; CI-NEXT: ; %bb.25: ; %frem.else78 +; CI-NEXT: ; %bb.25: ; %frem.else ; CI-NEXT: s_brev_b32 s2, -2 ; CI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -13761,7 +13761,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB12_32 ; CI-NEXT: .LBB12_26: ; CI-NEXT: ; implicit-def: $vgpr11 -; CI-NEXT: .LBB12_27: ; %frem.compute77 +; CI-NEXT: .LBB12_27: ; %frem.compute ; CI-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; CI-NEXT: v_ldexp_f32_e64 v12, v12, 1 ; CI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -13786,10 +13786,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB12_31 -; CI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v13, vcc, 12, v13 -; CI-NEXT: .LBB12_29: ; %frem.loop_body85 +; CI-NEXT: .LBB12_29: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v16, v14 ; CI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -13804,7 +13804,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_cbranch_vccnz .LBB12_29 ; CI-NEXT: ; %bb.30: ; %Flow ; CI-NEXT: v_mov_b32_e32 v14, v16 -; CI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; CI-NEXT: .LBB12_31: ; %frem.loop_exit ; CI-NEXT: v_add_i32_e32 v13, vcc, -11, v13 ; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13 ; CI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -13857,7 +13857,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else78 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v8, s2, 0, v0 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -13866,7 +13866,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_8 ; VI-NEXT: .LBB12_2: ; VI-NEXT: ; implicit-def: $vgpr8 -; VI-NEXT: .LBB12_3: ; %frem.compute +; VI-NEXT: .LBB12_3: ; %frem.compute77 ; VI-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; VI-NEXT: v_ldexp_f32 v9, v9, 1 ; VI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -13891,10 +13891,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14 ; VI-NEXT: v_add_u32_e32 v10, vcc, 12, v10 -; VI-NEXT: .LBB12_5: ; %frem.loop_body +; VI-NEXT: .LBB12_5: ; %frem.loop_body85 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v13, v11 ; VI-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -13909,7 +13909,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_5 ; VI-NEXT: ; %bb.6: ; %Flow125 ; VI-NEXT: v_mov_b32_e32 v11, v13 -; VI-NEXT: .LBB12_7: ; %frem.loop_exit +; VI-NEXT: .LBB12_7: ; %frem.loop_exit86 ; VI-NEXT: v_add_u32_e32 v10, vcc, -11, v10 ; VI-NEXT: v_ldexp_f32 v10, v11, v10 ; VI-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -13925,7 +13925,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else47 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v9, s2, 0, v1 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -13934,7 +13934,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_16 ; VI-NEXT: .LBB12_10: ; VI-NEXT: ; implicit-def: $vgpr9 -; VI-NEXT: .LBB12_11: ; %frem.compute15 +; VI-NEXT: .LBB12_11: ; %frem.compute46 ; VI-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; VI-NEXT: v_ldexp_f32 v10, v10, 1 ; VI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -13959,10 +13959,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; VI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; VI-NEXT: v_sub_u32_e32 v11, vcc, v14, v15 ; VI-NEXT: v_add_u32_e32 v11, vcc, 12, v11 -; VI-NEXT: .LBB12_13: ; %frem.loop_body23 +; VI-NEXT: .LBB12_13: ; %frem.loop_body54 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v14, v12 ; VI-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -13977,7 +13977,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_13 ; VI-NEXT: ; %bb.14: ; %Flow121 ; VI-NEXT: v_mov_b32_e32 v12, v14 -; VI-NEXT: .LBB12_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB12_15: ; %frem.loop_exit55 ; VI-NEXT: v_add_u32_e32 v11, vcc, -11, v11 ; VI-NEXT: v_ldexp_f32 v11, v12, v11 ; VI-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -13993,7 +13993,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_18 -; VI-NEXT: ; %bb.17: ; %frem.else47 +; VI-NEXT: ; %bb.17: ; %frem.else16 ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v10, s2, 0, v2 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -14002,7 +14002,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_24 ; VI-NEXT: .LBB12_18: ; VI-NEXT: ; implicit-def: $vgpr10 -; VI-NEXT: .LBB12_19: ; %frem.compute46 +; VI-NEXT: .LBB12_19: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; VI-NEXT: v_ldexp_f32 v11, v11, 1 ; VI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -14027,10 +14027,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; VI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_23 -; VI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; VI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v12, vcc, v15, v16 ; VI-NEXT: v_add_u32_e32 v12, vcc, 12, v12 -; VI-NEXT: .LBB12_21: ; %frem.loop_body54 +; VI-NEXT: .LBB12_21: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v15, v13 ; VI-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -14045,7 +14045,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_21 ; VI-NEXT: ; %bb.22: ; %Flow117 ; VI-NEXT: v_mov_b32_e32 v13, v15 -; VI-NEXT: .LBB12_23: ; %frem.loop_exit55 +; VI-NEXT: .LBB12_23: ; %frem.loop_exit24 ; VI-NEXT: v_add_u32_e32 v12, vcc, -11, v12 ; VI-NEXT: v_ldexp_f32 v12, v13, v12 ; VI-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -14061,7 +14061,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB12_26 -; VI-NEXT: ; %bb.25: ; %frem.else78 +; VI-NEXT: ; %bb.25: ; %frem.else ; VI-NEXT: s_brev_b32 s2, -2 ; VI-NEXT: v_bfi_b32 v11, s2, 0, v3 ; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -14070,7 +14070,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB12_32 ; VI-NEXT: .LBB12_26: ; VI-NEXT: ; implicit-def: $vgpr11 -; VI-NEXT: .LBB12_27: ; %frem.compute77 +; VI-NEXT: .LBB12_27: ; %frem.compute ; VI-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; VI-NEXT: v_ldexp_f32 v12, v12, 1 ; VI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -14095,10 +14095,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB12_31 -; VI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v13, vcc, 12, v13 -; VI-NEXT: .LBB12_29: ; %frem.loop_body85 +; VI-NEXT: .LBB12_29: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v16, v14 ; VI-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -14113,7 +14113,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_cbranch_vccnz .LBB12_29 ; VI-NEXT: ; %bb.30: ; %Flow ; VI-NEXT: v_mov_b32_e32 v14, v16 -; VI-NEXT: .LBB12_31: ; %frem.loop_exit86 +; VI-NEXT: .LBB12_31: ; %frem.loop_exit ; VI-NEXT: v_add_u32_e32 v13, vcc, -11, v13 ; VI-NEXT: v_ldexp_f32 v13, v14, v13 ; VI-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -14161,7 +14161,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else78 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v8, s2, 0, v0 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4| @@ -14170,7 +14170,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_8 ; GFX9-NEXT: .LBB12_2: ; GFX9-NEXT: ; implicit-def: $vgpr8 -; GFX9-NEXT: .LBB12_3: ; %frem.compute +; GFX9-NEXT: .LBB12_3: ; %frem.compute77 ; GFX9-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX9-NEXT: v_ldexp_f32 v9, v9, 1 ; GFX9-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0 @@ -14195,10 +14195,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10 ; GFX9-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX9-NEXT: v_sub_u32_e32 v10, v13, v14 ; GFX9-NEXT: v_add_u32_e32 v10, 12, v10 -; GFX9-NEXT: .LBB12_5: ; %frem.loop_body +; GFX9-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v13, v11 ; GFX9-NEXT: v_mul_f32_e32 v11, v13, v12 @@ -14213,7 +14213,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_5 ; GFX9-NEXT: ; %bb.6: ; %Flow125 ; GFX9-NEXT: v_mov_b32_e32 v11, v13 -; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX9-NEXT: v_add_u32_e32 v10, -11, v10 ; GFX9-NEXT: v_ldexp_f32 v10, v11, v10 ; GFX9-NEXT: v_mul_f32_e32 v11, v10, v12 @@ -14229,7 +14229,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else47 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v1 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5| @@ -14238,7 +14238,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_16 ; GFX9-NEXT: .LBB12_10: ; GFX9-NEXT: ; implicit-def: $vgpr9 -; GFX9-NEXT: .LBB12_11: ; %frem.compute15 +; GFX9-NEXT: .LBB12_11: ; %frem.compute46 ; GFX9-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX9-NEXT: v_ldexp_f32 v10, v10, 1 ; GFX9-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0 @@ -14263,10 +14263,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11 ; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v11, 12, v11 -; GFX9-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v14, v12 ; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13 @@ -14281,7 +14281,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_13 ; GFX9-NEXT: ; %bb.14: ; %Flow121 ; GFX9-NEXT: v_mov_b32_e32 v12, v14 -; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX9-NEXT: v_add_u32_e32 v11, -11, v11 ; GFX9-NEXT: v_ldexp_f32 v11, v12, v11 ; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13 @@ -14297,7 +14297,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_18 -; GFX9-NEXT: ; %bb.17: ; %frem.else47 +; GFX9-NEXT: ; %bb.17: ; %frem.else16 ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v10, s2, 0, v2 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6| @@ -14306,7 +14306,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_24 ; GFX9-NEXT: .LBB12_18: ; GFX9-NEXT: ; implicit-def: $vgpr10 -; GFX9-NEXT: .LBB12_19: ; %frem.compute46 +; GFX9-NEXT: .LBB12_19: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX9-NEXT: v_ldexp_f32 v11, v11, 1 ; GFX9-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0 @@ -14331,10 +14331,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12 ; GFX9-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX9-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX9-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v12, v15, v16 ; GFX9-NEXT: v_add_u32_e32 v12, 12, v12 -; GFX9-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX9-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v15, v13 ; GFX9-NEXT: v_mul_f32_e32 v13, v15, v14 @@ -14349,7 +14349,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_21 ; GFX9-NEXT: ; %bb.22: ; %Flow117 ; GFX9-NEXT: v_mov_b32_e32 v13, v15 -; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX9-NEXT: v_add_u32_e32 v12, -11, v12 ; GFX9-NEXT: v_ldexp_f32 v12, v13, v12 ; GFX9-NEXT: v_mul_f32_e32 v13, v12, v14 @@ -14365,7 +14365,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB12_26 -; GFX9-NEXT: ; %bb.25: ; %frem.else78 +; GFX9-NEXT: ; %bb.25: ; %frem.else ; GFX9-NEXT: s_brev_b32 s2, -2 ; GFX9-NEXT: v_bfi_b32 v11, s2, 0, v3 ; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7| @@ -14374,7 +14374,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB12_32 ; GFX9-NEXT: .LBB12_26: ; GFX9-NEXT: ; implicit-def: $vgpr11 -; GFX9-NEXT: .LBB12_27: ; %frem.compute77 +; GFX9-NEXT: .LBB12_27: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX9-NEXT: v_ldexp_f32 v12, v12, 1 ; GFX9-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0 @@ -14399,10 +14399,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13 ; GFX9-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX9-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v13, v16, v17 ; GFX9-NEXT: v_add_u32_e32 v13, 12, v13 -; GFX9-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX9-NEXT: .LBB12_29: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v16, v14 ; GFX9-NEXT: v_mul_f32_e32 v14, v16, v15 @@ -14417,7 +14417,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_cbranch_vccnz .LBB12_29 ; GFX9-NEXT: ; %bb.30: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v14, v16 -; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX9-NEXT: v_add_u32_e32 v13, -11, v13 ; GFX9-NEXT: v_ldexp_f32 v13, v14, v13 ; GFX9-NEXT: v_mul_f32_e32 v14, v13, v15 @@ -14466,7 +14466,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else78 ; GFX10-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4| ; GFX10-NEXT: v_cndmask_b32_e32 v8, v0, v8, vcc_lo @@ -14474,7 +14474,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_8 ; GFX10-NEXT: .LBB12_2: ; GFX10-NEXT: ; implicit-def: $vgpr8 -; GFX10-NEXT: .LBB12_3: ; %frem.compute +; GFX10-NEXT: .LBB12_3: ; %frem.compute77 ; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX10-NEXT: v_frexp_mant_f32_e64 v8, |v0| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v11, v0 @@ -14501,10 +14501,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v12 ; GFX10-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_5: ; %frem.loop_body +; GFX10-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v13, v10 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14520,7 +14520,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.6: ; %Flow125 ; GFX10-NEXT: v_mov_b32_e32 v12, s2 ; GFX10-NEXT: v_mov_b32_e32 v10, v13 -; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX10-NEXT: v_add_nc_u32_e32 v12, -11, v12 ; GFX10-NEXT: v_ldexp_f32 v10, v10, v12 ; GFX10-NEXT: v_mul_f32_e32 v11, v10, v11 @@ -14535,7 +14535,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else47 ; GFX10-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5| ; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v9, vcc_lo @@ -14543,7 +14543,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_16 ; GFX10-NEXT: .LBB12_10: ; GFX10-NEXT: ; implicit-def: $vgpr9 -; GFX10-NEXT: .LBB12_11: ; %frem.compute15 +; GFX10-NEXT: .LBB12_11: ; %frem.compute46 ; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v1| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v1 @@ -14570,10 +14570,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v13 ; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v14, v11 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14589,7 +14589,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.14: ; %Flow121 ; GFX10-NEXT: v_mov_b32_e32 v13, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v14 -; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX10-NEXT: v_add_nc_u32_e32 v13, -11, v13 ; GFX10-NEXT: v_ldexp_f32 v11, v11, v13 ; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12 @@ -14604,7 +14604,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_18 -; GFX10-NEXT: ; %bb.17: ; %frem.else47 +; GFX10-NEXT: ; %bb.17: ; %frem.else16 ; GFX10-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6| ; GFX10-NEXT: v_cndmask_b32_e32 v10, v2, v10, vcc_lo @@ -14612,7 +14612,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_24 ; GFX10-NEXT: .LBB12_18: ; GFX10-NEXT: ; implicit-def: $vgpr10 -; GFX10-NEXT: .LBB12_19: ; %frem.compute46 +; GFX10-NEXT: .LBB12_19: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v2| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v13, v2 @@ -14639,10 +14639,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v14 ; GFX10-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX10-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX10-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX10-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v15, v12 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14658,7 +14658,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.22: ; %Flow117 ; GFX10-NEXT: v_mov_b32_e32 v14, s2 ; GFX10-NEXT: v_mov_b32_e32 v12, v15 -; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX10-NEXT: v_add_nc_u32_e32 v14, -11, v14 ; GFX10-NEXT: v_ldexp_f32 v12, v12, v14 ; GFX10-NEXT: v_mul_f32_e32 v13, v12, v13 @@ -14673,7 +14673,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB12_26 -; GFX10-NEXT: ; %bb.25: ; %frem.else78 +; GFX10-NEXT: ; %bb.25: ; %frem.else ; GFX10-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3 ; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7| ; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v11, vcc_lo @@ -14681,7 +14681,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB12_32 ; GFX10-NEXT: .LBB12_26: ; GFX10-NEXT: ; implicit-def: $vgpr11 -; GFX10-NEXT: .LBB12_27: ; %frem.compute77 +; GFX10-NEXT: .LBB12_27: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v3| ; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v14, v3 @@ -14708,10 +14708,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v15 ; GFX10-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX10-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 12 -; GFX10-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX10-NEXT: .LBB12_29: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v16, v13 ; GFX10-NEXT: s_add_i32 s2, s2, -12 @@ -14727,7 +14727,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: ; %bb.30: ; %Flow ; GFX10-NEXT: v_mov_b32_e32 v15, s2 ; GFX10-NEXT: v_mov_b32_e32 v13, v16 -; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX10-NEXT: v_add_nc_u32_e32 v15, -11, v15 ; GFX10-NEXT: v_ldexp_f32 v13, v13, v15 ; GFX10-NEXT: v_mul_f32_e32 v14, v13, v14 @@ -14773,7 +14773,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else78 ; GFX11-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14782,7 +14782,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_8 ; GFX11-NEXT: .LBB12_2: ; GFX11-NEXT: ; implicit-def: $vgpr8 -; GFX11-NEXT: .LBB12_3: ; %frem.compute +; GFX11-NEXT: .LBB12_3: ; %frem.compute77 ; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v4| ; GFX11-NEXT: v_frexp_mant_f32_e64 v8, |v0| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v11, v0 @@ -14818,11 +14818,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_5: ; %frem.loop_body +; GFX11-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v13, v10 @@ -14842,7 +14842,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow125 ; GFX11-NEXT: v_mov_b32_e32 v12, s2 ; GFX11-NEXT: v_mov_b32_e32 v10, v13 -; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v12, -11, v12 ; GFX11-NEXT: v_ldexp_f32 v10, v10, v12 @@ -14862,7 +14862,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else47 ; GFX11-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14871,7 +14871,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_16 ; GFX11-NEXT: .LBB12_10: ; GFX11-NEXT: ; implicit-def: $vgpr9 -; GFX11-NEXT: .LBB12_11: ; %frem.compute15 +; GFX11-NEXT: .LBB12_11: ; %frem.compute46 ; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v5| ; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v1| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v12, v1 @@ -14907,11 +14907,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v14, v11 @@ -14931,7 +14931,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow121 ; GFX11-NEXT: v_mov_b32_e32 v13, s2 ; GFX11-NEXT: v_mov_b32_e32 v11, v14 -; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v13, -11, v13 ; GFX11-NEXT: v_ldexp_f32 v11, v11, v13 @@ -14951,7 +14951,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_18 -; GFX11-NEXT: ; %bb.17: ; %frem.else47 +; GFX11-NEXT: ; %bb.17: ; %frem.else16 ; GFX11-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -14960,7 +14960,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_24 ; GFX11-NEXT: .LBB12_18: ; GFX11-NEXT: ; implicit-def: $vgpr10 -; GFX11-NEXT: .LBB12_19: ; %frem.compute46 +; GFX11-NEXT: .LBB12_19: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v6| ; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v2| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v13, v2 @@ -14996,11 +14996,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX11-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX11-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX11-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v15, v12 @@ -15020,7 +15020,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.22: ; %Flow117 ; GFX11-NEXT: v_mov_b32_e32 v14, s2 ; GFX11-NEXT: v_mov_b32_e32 v12, v15 -; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v14, -11, v14 ; GFX11-NEXT: v_ldexp_f32 v12, v12, v14 @@ -15040,7 +15040,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB12_26 -; GFX11-NEXT: ; %bb.25: ; %frem.else78 +; GFX11-NEXT: ; %bb.25: ; %frem.else ; GFX11-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3 ; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7| ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) @@ -15049,7 +15049,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB12_32 ; GFX11-NEXT: .LBB12_26: ; GFX11-NEXT: ; implicit-def: $vgpr11 -; GFX11-NEXT: .LBB12_27: ; %frem.compute77 +; GFX11-NEXT: .LBB12_27: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f32_e64 v12, |v7| ; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v3| ; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v14, v3 @@ -15085,11 +15085,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX11-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX11-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 12 -; GFX11-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX11-NEXT: .LBB12_29: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_mov_b32_e32 v16, v13 @@ -15109,7 +15109,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.30: ; %Flow ; GFX11-NEXT: v_mov_b32_e32 v15, s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v16 -; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_nc_u32_e32 v15, -11, v15 ; GFX11-NEXT: v_ldexp_f32 v13, v13, v15 @@ -15170,7 +15170,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s5, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else78 ; GFX1150-NEXT: s_cmp_eq_f32 s5, s12 ; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15180,7 +15180,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_8 ; GFX1150-NEXT: .LBB12_2: ; GFX1150-NEXT: ; implicit-def: $vgpr0 -; GFX1150-NEXT: .LBB12_3: ; %frem.compute +; GFX1150-NEXT: .LBB12_3: ; %frem.compute77 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s6| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s8| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -15215,11 +15215,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v5, v2 @@ -15241,7 +15241,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow125 ; GFX1150-NEXT: v_mov_b32_e32 v4, s11 ; GFX1150-NEXT: v_mov_b32_e32 v2, v5 -; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4 @@ -15264,7 +15264,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s8, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else47 ; GFX1150-NEXT: s_cmp_eq_f32 s8, s12 ; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15274,7 +15274,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_16 ; GFX1150-NEXT: .LBB12_10: ; GFX1150-NEXT: ; implicit-def: $vgpr1 -; GFX1150-NEXT: .LBB12_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB12_11: ; %frem.compute46 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s4| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s10| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -15309,11 +15309,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v6, v3 @@ -15335,7 +15335,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow121 ; GFX1150-NEXT: v_mov_b32_e32 v5, s11 ; GFX1150-NEXT: v_mov_b32_e32 v3, v6 -; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5 @@ -15358,7 +15358,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s10, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_18 -; GFX1150-NEXT: ; %bb.17: ; %frem.else47 +; GFX1150-NEXT: ; %bb.17: ; %frem.else16 ; GFX1150-NEXT: s_cmp_eq_f32 s10, s12 ; GFX1150-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15368,7 +15368,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_24 ; GFX1150-NEXT: .LBB12_18: ; GFX1150-NEXT: ; implicit-def: $vgpr2 -; GFX1150-NEXT: .LBB12_19: ; %frem.compute46 +; GFX1150-NEXT: .LBB12_19: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s3| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v5, s9 @@ -15403,11 +15403,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6 ; GFX1150-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v7, v4 @@ -15429,7 +15429,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.22: ; %Flow117 ; GFX1150-NEXT: v_mov_b32_e32 v6, s11 ; GFX1150-NEXT: v_mov_b32_e32 v4, v7 -; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v6, -11, v6 ; GFX1150-NEXT: v_ldexp_f32 v4, v4, v6 @@ -15452,7 +15452,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_cmp_ngt_f32 s9, s12 ; GFX1150-NEXT: s_cbranch_scc0 .LBB12_26 -; GFX1150-NEXT: ; %bb.25: ; %frem.else78 +; GFX1150-NEXT: ; %bb.25: ; %frem.else ; GFX1150-NEXT: s_cmp_eq_f32 s9, s12 ; GFX1150-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7 ; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15462,7 +15462,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB12_32 ; GFX1150-NEXT: .LBB12_26: ; GFX1150-NEXT: ; implicit-def: $vgpr3 -; GFX1150-NEXT: .LBB12_27: ; %frem.compute77 +; GFX1150-NEXT: .LBB12_27: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f32_e64 v4, |s2| ; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s7| ; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v6, s7 @@ -15497,11 +15497,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7 ; GFX1150-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s11, s11, s12 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s11, s11, 12 -; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_mov_b32_e32 v8, v5 @@ -15523,7 +15523,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.30: ; %Flow ; GFX1150-NEXT: v_mov_b32_e32 v7, s11 ; GFX1150-NEXT: v_mov_b32_e32 v5, v8 -; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_add_nc_u32_e32 v7, -11, v7 ; GFX1150-NEXT: v_ldexp_f32 v5, v5, v7 @@ -15597,7 +15597,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_cmp_ngt_f32 s5, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else78 ; GFX1200-NEXT: s_cmp_eq_f32 s5, s12 ; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15607,7 +15607,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_8 ; GFX1200-NEXT: .LBB12_2: ; GFX1200-NEXT: ; implicit-def: $vgpr0 -; GFX1200-NEXT: .LBB12_3: ; %frem.compute +; GFX1200-NEXT: .LBB12_3: ; %frem.compute77 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s6| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s8| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s8 @@ -15643,11 +15643,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4 ; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body85.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body85 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1200-NEXT: v_mov_b32_e32 v5, v2 @@ -15670,7 +15670,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow125 ; GFX1200-NEXT: v_mov_b32_e32 v4, s11 ; GFX1200-NEXT: v_mov_b32_e32 v2, v5 -; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit86 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4 ; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4 @@ -15694,7 +15694,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s8, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else47 ; GFX1200-NEXT: s_cmp_eq_f32 s8, s12 ; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15705,7 +15705,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_16 ; GFX1200-NEXT: .LBB12_10: ; GFX1200-NEXT: ; implicit-def: $vgpr1 -; GFX1200-NEXT: .LBB12_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB12_11: ; %frem.compute46 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s4| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s10| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s10 @@ -15741,11 +15741,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5 ; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body54.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body54 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v6, v3 @@ -15769,7 +15769,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow121 ; GFX1200-NEXT: v_mov_b32_e32 v5, s11 ; GFX1200-NEXT: v_mov_b32_e32 v3, v6 -; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit55 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5 ; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5 @@ -15793,7 +15793,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s10, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_18 -; GFX1200-NEXT: ; %bb.17: ; %frem.else47 +; GFX1200-NEXT: ; %bb.17: ; %frem.else16 ; GFX1200-NEXT: s_cmp_eq_f32 s10, s12 ; GFX1200-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15804,7 +15804,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_24 ; GFX1200-NEXT: .LBB12_18: ; GFX1200-NEXT: ; implicit-def: $vgpr2 -; GFX1200-NEXT: .LBB12_19: ; %frem.compute46 +; GFX1200-NEXT: .LBB12_19: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s3| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s9| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v5, s9 @@ -15840,11 +15840,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6 ; GFX1200-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_23 -; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body54.preheader +; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body54 +; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v7, v4 @@ -15868,7 +15868,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.22: ; %Flow117 ; GFX1200-NEXT: v_mov_b32_e32 v6, s11 ; GFX1200-NEXT: v_mov_b32_e32 v4, v7 -; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit55 +; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v6, -11, v6 ; GFX1200-NEXT: v_ldexp_f32 v4, v4, v6 @@ -15892,7 +15892,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cmp_ngt_f32 s9, s12 ; GFX1200-NEXT: s_cbranch_scc0 .LBB12_26 -; GFX1200-NEXT: ; %bb.25: ; %frem.else78 +; GFX1200-NEXT: ; %bb.25: ; %frem.else ; GFX1200-NEXT: s_cmp_eq_f32 s9, s12 ; GFX1200-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7 ; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0 @@ -15903,7 +15903,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB12_32 ; GFX1200-NEXT: .LBB12_26: ; GFX1200-NEXT: ; implicit-def: $vgpr3 -; GFX1200-NEXT: .LBB12_27: ; %frem.compute77 +; GFX1200-NEXT: .LBB12_27: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f32_e64 v4, |s2| ; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s7| ; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v6, s7 @@ -15939,11 +15939,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7 ; GFX1200-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB12_31 -; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body85.preheader +; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s11, s11, 12 -; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body85 +; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1200-NEXT: v_mov_b32_e32 v8, v5 @@ -15967,7 +15967,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.30: ; %Flow ; GFX1200-NEXT: v_mov_b32_e32 v7, s11 ; GFX1200-NEXT: v_mov_b32_e32 v5, v8 -; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit86 +; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_add_nc_u32_e32 v7, -11, v7 ; GFX1200-NEXT: v_ldexp_f32 v5, v5, v7 @@ -16048,7 +16048,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[0:1]|, |v[4:5]| ; SI-NEXT: s_and_b64 vcc, exec, s[0:1] ; SI-NEXT: s_cbranch_vccz .LBB13_2 -; SI-NEXT: ; %bb.1: ; %frem.else +; SI-NEXT: ; %bb.1: ; %frem.else16 ; SI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; SI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16059,7 +16059,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB13_2: ; SI-NEXT: ; implicit-def: $vgpr8_vgpr9 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB13_3: ; %frem.compute +; SI-NEXT: .LBB13_3: ; %frem.compute15 ; SI-NEXT: s_brev_b32 s5, -2 ; SI-NEXT: v_and_b32_e32 v10, 0x7fffffff, v1 ; SI-NEXT: s_mov_b32 s0, 0 @@ -16105,13 +16105,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; SI-NEXT: s_cmp_lt_i32 s6, 27 ; SI-NEXT: s_cbranch_scc1 .LBB13_7 -; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; SI-NEXT: s_sub_i32 s0, s3, s7 ; SI-NEXT: s_add_i32 s6, s0, 26 ; SI-NEXT: s_mov_b32 s3, 0x432fffff ; SI-NEXT: v_mov_b32_e32 v18, 0x43300000 ; SI-NEXT: v_mov_b32_e32 v14, 0 -; SI-NEXT: .LBB13_5: ; %frem.loop_body +; SI-NEXT: .LBB13_5: ; %frem.loop_body23 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v17, v11 ; SI-NEXT: v_mov_b32_e32 v16, v10 @@ -16134,7 +16134,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: ; %bb.6: ; %Flow51 ; SI-NEXT: v_mov_b32_e32 v10, v16 ; SI-NEXT: v_mov_b32_e32 v11, v17 -; SI-NEXT: .LBB13_7: ; %frem.loop_exit +; SI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; SI-NEXT: s_sub_i32 s0, s6, 25 ; SI-NEXT: v_ldexp_f64 v[10:11], v[10:11], s0 ; SI-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13] @@ -16160,7 +16160,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[2:3]|, |v[6:7]| ; SI-NEXT: s_and_b64 vcc, exec, s[0:1] ; SI-NEXT: s_cbranch_vccz .LBB13_10 -; SI-NEXT: ; %bb.9: ; %frem.else16 +; SI-NEXT: ; %bb.9: ; %frem.else ; SI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; SI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16171,7 +16171,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: .LBB13_10: ; SI-NEXT: ; implicit-def: $vgpr10_vgpr11 ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: .LBB13_11: ; %frem.compute15 +; SI-NEXT: .LBB13_11: ; %frem.compute ; SI-NEXT: s_brev_b32 s5, -2 ; SI-NEXT: v_and_b32_e32 v12, 0x7fffffff, v3 ; SI-NEXT: s_mov_b32 s0, 0 @@ -16217,13 +16217,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; SI-NEXT: s_cmp_lt_i32 s6, 27 ; SI-NEXT: s_cbranch_scc1 .LBB13_15 -; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; SI-NEXT: s_sub_i32 s0, s3, s7 ; SI-NEXT: s_add_i32 s6, s0, 26 ; SI-NEXT: s_mov_b32 s3, 0x432fffff ; SI-NEXT: v_mov_b32_e32 v20, 0x43300000 ; SI-NEXT: v_mov_b32_e32 v16, 0 -; SI-NEXT: .LBB13_13: ; %frem.loop_body23 +; SI-NEXT: .LBB13_13: ; %frem.loop_body ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_mov_b32_e32 v19, v13 ; SI-NEXT: v_mov_b32_e32 v18, v12 @@ -16246,7 +16246,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; SI-NEXT: ; %bb.14: ; %Flow ; SI-NEXT: v_mov_b32_e32 v12, v18 ; SI-NEXT: v_mov_b32_e32 v13, v19 -; SI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; SI-NEXT: .LBB13_15: ; %frem.loop_exit ; SI-NEXT: s_sub_i32 s0, s6, 25 ; SI-NEXT: v_ldexp_f64 v[12:13], v[12:13], s0 ; SI-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15] @@ -16304,7 +16304,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB13_2 -; CI-NEXT: ; %bb.1: ; %frem.else +; CI-NEXT: ; %bb.1: ; %frem.else16 ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; CI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; CI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16313,7 +16313,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB13_8 ; CI-NEXT: .LBB13_2: ; CI-NEXT: ; implicit-def: $vgpr8_vgpr9 -; CI-NEXT: .LBB13_3: ; %frem.compute +; CI-NEXT: .LBB13_3: ; %frem.compute15 ; CI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; CI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; CI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16337,10 +16337,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; CI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_7 -; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; CI-NEXT: v_sub_i32_e32 v14, vcc, v14, v15 ; CI-NEXT: v_add_i32_e32 v17, vcc, 26, v14 -; CI-NEXT: .LBB13_5: ; %frem.loop_body +; CI-NEXT: .LBB13_5: ; %frem.loop_body23 ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v15, v11 ; CI-NEXT: v_mov_b32_e32 v14, v10 @@ -16358,7 +16358,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; %bb.6: ; %Flow51 ; CI-NEXT: v_mov_b32_e32 v10, v14 ; CI-NEXT: v_mov_b32_e32 v11, v15 -; CI-NEXT: .LBB13_7: ; %frem.loop_exit +; CI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; CI-NEXT: v_subrev_i32_e32 v14, vcc, 25, v17 ; CI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; CI-NEXT: s_brev_b32 s2, -2 @@ -16375,7 +16375,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; CI-NEXT: s_and_b64 vcc, exec, s[2:3] ; CI-NEXT: s_cbranch_vccz .LBB13_10 -; CI-NEXT: ; %bb.9: ; %frem.else16 +; CI-NEXT: ; %bb.9: ; %frem.else ; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; CI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; CI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16384,7 +16384,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: s_branch .LBB13_16 ; CI-NEXT: .LBB13_10: ; CI-NEXT: ; implicit-def: $vgpr10_vgpr11 -; CI-NEXT: .LBB13_11: ; %frem.compute15 +; CI-NEXT: .LBB13_11: ; %frem.compute ; CI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; CI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; CI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16408,10 +16408,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; CI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; CI-NEXT: s_cbranch_vccnz .LBB13_15 -; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; CI-NEXT: v_sub_i32_e32 v16, vcc, v16, v17 ; CI-NEXT: v_add_i32_e32 v19, vcc, 26, v16 -; CI-NEXT: .LBB13_13: ; %frem.loop_body23 +; CI-NEXT: .LBB13_13: ; %frem.loop_body ; CI-NEXT: ; =>This Inner Loop Header: Depth=1 ; CI-NEXT: v_mov_b32_e32 v17, v13 ; CI-NEXT: v_mov_b32_e32 v16, v12 @@ -16429,7 +16429,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; CI-NEXT: ; %bb.14: ; %Flow ; CI-NEXT: v_mov_b32_e32 v12, v16 ; CI-NEXT: v_mov_b32_e32 v13, v17 -; CI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; CI-NEXT: .LBB13_15: ; %frem.loop_exit ; CI-NEXT: v_subrev_i32_e32 v16, vcc, 25, v19 ; CI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; CI-NEXT: s_brev_b32 s2, -2 @@ -16478,7 +16478,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB13_2 -; VI-NEXT: ; %bb.1: ; %frem.else +; VI-NEXT: ; %bb.1: ; %frem.else16 ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; VI-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; VI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16487,7 +16487,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB13_8 ; VI-NEXT: .LBB13_2: ; VI-NEXT: ; implicit-def: $vgpr8_vgpr9 -; VI-NEXT: .LBB13_3: ; %frem.compute +; VI-NEXT: .LBB13_3: ; %frem.compute15 ; VI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; VI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; VI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16511,10 +16511,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; VI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_7 -; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; VI-NEXT: v_sub_u32_e32 v14, vcc, v14, v15 ; VI-NEXT: v_add_u32_e32 v17, vcc, 26, v14 -; VI-NEXT: .LBB13_5: ; %frem.loop_body +; VI-NEXT: .LBB13_5: ; %frem.loop_body23 ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v15, v11 ; VI-NEXT: v_mov_b32_e32 v14, v10 @@ -16532,7 +16532,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; %bb.6: ; %Flow51 ; VI-NEXT: v_mov_b32_e32 v10, v14 ; VI-NEXT: v_mov_b32_e32 v11, v15 -; VI-NEXT: .LBB13_7: ; %frem.loop_exit +; VI-NEXT: .LBB13_7: ; %frem.loop_exit24 ; VI-NEXT: v_subrev_u32_e32 v14, vcc, 25, v17 ; VI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; VI-NEXT: s_brev_b32 s2, -2 @@ -16549,7 +16549,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; VI-NEXT: s_and_b64 vcc, exec, s[2:3] ; VI-NEXT: s_cbranch_vccz .LBB13_10 -; VI-NEXT: ; %bb.9: ; %frem.else16 +; VI-NEXT: ; %bb.9: ; %frem.else ; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; VI-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; VI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16558,7 +16558,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: s_branch .LBB13_16 ; VI-NEXT: .LBB13_10: ; VI-NEXT: ; implicit-def: $vgpr10_vgpr11 -; VI-NEXT: .LBB13_11: ; %frem.compute15 +; VI-NEXT: .LBB13_11: ; %frem.compute ; VI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; VI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; VI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16582,10 +16582,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; VI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; VI-NEXT: s_cbranch_vccnz .LBB13_15 -; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; VI-NEXT: v_sub_u32_e32 v16, vcc, v16, v17 ; VI-NEXT: v_add_u32_e32 v19, vcc, 26, v16 -; VI-NEXT: .LBB13_13: ; %frem.loop_body23 +; VI-NEXT: .LBB13_13: ; %frem.loop_body ; VI-NEXT: ; =>This Inner Loop Header: Depth=1 ; VI-NEXT: v_mov_b32_e32 v17, v13 ; VI-NEXT: v_mov_b32_e32 v16, v12 @@ -16603,7 +16603,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; VI-NEXT: ; %bb.14: ; %Flow ; VI-NEXT: v_mov_b32_e32 v12, v16 ; VI-NEXT: v_mov_b32_e32 v13, v17 -; VI-NEXT: .LBB13_15: ; %frem.loop_exit24 +; VI-NEXT: .LBB13_15: ; %frem.loop_exit ; VI-NEXT: v_subrev_u32_e32 v16, vcc, 25, v19 ; VI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; VI-NEXT: s_brev_b32 s2, -2 @@ -16647,7 +16647,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB13_2 -; GFX9-NEXT: ; %bb.1: ; %frem.else +; GFX9-NEXT: ; %bb.1: ; %frem.else16 ; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]| ; GFX9-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX9-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc @@ -16656,7 +16656,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB13_8 ; GFX9-NEXT: .LBB13_2: ; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX9-NEXT: .LBB13_3: ; %frem.compute +; GFX9-NEXT: .LBB13_3: ; %frem.compute15 ; GFX9-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5] ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1] @@ -16680,10 +16680,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17 ; GFX9-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX9-NEXT: v_sub_u32_e32 v14, v14, v15 ; GFX9-NEXT: v_add_u32_e32 v17, 26, v14 -; GFX9-NEXT: .LBB13_5: ; %frem.loop_body +; GFX9-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v15, v11 ; GFX9-NEXT: v_mov_b32_e32 v14, v10 @@ -16701,7 +16701,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: ; %bb.6: ; %Flow51 ; GFX9-NEXT: v_mov_b32_e32 v10, v14 ; GFX9-NEXT: v_mov_b32_e32 v11, v15 -; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX9-NEXT: v_subrev_u32_e32 v14, 25, v17 ; GFX9-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; GFX9-NEXT: s_brev_b32 s2, -2 @@ -16718,7 +16718,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]| ; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB13_10 -; GFX9-NEXT: ; %bb.9: ; %frem.else16 +; GFX9-NEXT: ; %bb.9: ; %frem.else ; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]| ; GFX9-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc @@ -16727,7 +16727,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: s_branch .LBB13_16 ; GFX9-NEXT: .LBB13_10: ; GFX9-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX9-NEXT: .LBB13_11: ; %frem.compute15 +; GFX9-NEXT: .LBB13_11: ; %frem.compute ; GFX9-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7] ; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3] @@ -16751,10 +16751,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19 ; GFX9-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX9-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX9-NEXT: v_sub_u32_e32 v16, v16, v17 ; GFX9-NEXT: v_add_u32_e32 v19, 26, v16 -; GFX9-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX9-NEXT: .LBB13_13: ; %frem.loop_body ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX9-NEXT: v_mov_b32_e32 v17, v13 ; GFX9-NEXT: v_mov_b32_e32 v16, v12 @@ -16772,7 +16772,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX9-NEXT: ; %bb.14: ; %Flow ; GFX9-NEXT: v_mov_b32_e32 v12, v16 ; GFX9-NEXT: v_mov_b32_e32 v13, v17 -; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX9-NEXT: v_subrev_u32_e32 v16, 25, v19 ; GFX9-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; GFX9-NEXT: s_brev_b32 s2, -2 @@ -16817,7 +16817,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB13_2 -; GFX10-NEXT: ; %bb.1: ; %frem.else +; GFX10-NEXT: ; %bb.1: ; %frem.else16 ; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX10-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc_lo @@ -16826,7 +16826,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB13_8 ; GFX10-NEXT: .LBB13_2: ; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX10-NEXT: .LBB13_3: ; %frem.compute +; GFX10-NEXT: .LBB13_3: ; %frem.compute15 ; GFX10-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -16851,10 +16851,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX10-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 26 -; GFX10-NEXT: .LBB13_5: ; %frem.loop_body +; GFX10-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v15, v11 ; GFX10-NEXT: v_mov_b32_e32 v14, v10 @@ -16873,7 +16873,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_mov_b32_e32 v10, v14 ; GFX10-NEXT: v_mov_b32_e32 v17, s2 ; GFX10-NEXT: v_mov_b32_e32 v11, v15 -; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX10-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 ; GFX10-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13] @@ -16889,7 +16889,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX10-NEXT: s_cbranch_vccz .LBB13_10 -; GFX10-NEXT: ; %bb.9: ; %frem.else16 +; GFX10-NEXT: ; %bb.9: ; %frem.else ; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX10-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc_lo @@ -16898,7 +16898,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: s_branch .LBB13_16 ; GFX10-NEXT: .LBB13_10: ; GFX10-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX10-NEXT: .LBB13_11: ; %frem.compute15 +; GFX10-NEXT: .LBB13_11: ; %frem.compute ; GFX10-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -16923,10 +16923,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX10-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX10-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX10-NEXT: s_sub_i32 s2, s2, s3 ; GFX10-NEXT: s_add_i32 s2, s2, 26 -; GFX10-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX10-NEXT: .LBB13_13: ; %frem.loop_body ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_mov_b32_e32 v17, v13 ; GFX10-NEXT: v_mov_b32_e32 v16, v12 @@ -16945,7 +16945,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX10-NEXT: v_mov_b32_e32 v12, v16 ; GFX10-NEXT: v_mov_b32_e32 v19, s2 ; GFX10-NEXT: v_mov_b32_e32 v13, v17 -; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX10-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 ; GFX10-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15] @@ -16986,7 +16986,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB13_2 -; GFX11-NEXT: ; %bb.1: ; %frem.else +; GFX11-NEXT: ; %bb.1: ; %frem.else16 ; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX11-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -16996,7 +16996,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB13_8 ; GFX11-NEXT: .LBB13_2: ; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX11-NEXT: .LBB13_3: ; %frem.compute +; GFX11-NEXT: .LBB13_3: ; %frem.compute15 ; GFX11-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17029,12 +17029,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 26 ; GFX11-NEXT: .p2align 6 -; GFX11-NEXT: .LBB13_5: ; %frem.loop_body +; GFX11-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17054,7 +17054,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.6: ; %Flow51 ; GFX11-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX11-NEXT: v_mov_b32_e32 v11, v15 -; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX11-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17074,7 +17074,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX11-NEXT: s_cbranch_vccz .LBB13_10 -; GFX11-NEXT: ; %bb.9: ; %frem.else16 +; GFX11-NEXT: ; %bb.9: ; %frem.else ; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX11-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17084,7 +17084,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_branch .LBB13_16 ; GFX11-NEXT: .LBB13_10: ; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX11-NEXT: .LBB13_11: ; %frem.compute15 +; GFX11-NEXT: .LBB13_11: ; %frem.compute ; GFX11-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17117,12 +17117,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX11-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX11-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX11-NEXT: s_sub_i32 s2, s2, s3 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_add_i32 s2, s2, 26 ; GFX11-NEXT: .p2align 6 -; GFX11-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX11-NEXT: .LBB13_13: ; %frem.loop_body ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17142,7 +17142,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-NEXT: ; %bb.14: ; %Flow ; GFX11-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX11-NEXT: v_mov_b32_e32 v13, v17 -; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX11-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 @@ -17187,7 +17187,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1150-NEXT: s_cbranch_vccz .LBB13_2 -; GFX1150-NEXT: ; %bb.1: ; %frem.else +; GFX1150-NEXT: ; %bb.1: ; %frem.else16 ; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX1150-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17197,7 +17197,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB13_8 ; GFX1150-NEXT: .LBB13_2: ; GFX1150-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1150-NEXT: .LBB13_3: ; %frem.compute +; GFX1150-NEXT: .LBB13_3: ; %frem.compute15 ; GFX1150-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17229,12 +17229,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX1150-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1150-NEXT: s_sub_i32 s2, s2, s3 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s2, s2, 26 ; GFX1150-NEXT: .p2align 6 -; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body +; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17254,7 +17254,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.6: ; %Flow51 ; GFX1150-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX1150-NEXT: v_mov_b32_e32 v11, v15 -; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX1150-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17274,7 +17274,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]| ; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1150-NEXT: s_cbranch_vccz .LBB13_10 -; GFX1150-NEXT: ; %bb.9: ; %frem.else16 +; GFX1150-NEXT: ; %bb.9: ; %frem.else ; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX1150-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17284,7 +17284,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: s_branch .LBB13_16 ; GFX1150-NEXT: .LBB13_10: ; GFX1150-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX1150-NEXT: .LBB13_11: ; %frem.compute15 +; GFX1150-NEXT: .LBB13_11: ; %frem.compute ; GFX1150-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17316,12 +17316,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX1150-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX1150-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1150-NEXT: s_sub_i32 s2, s2, s3 ; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-NEXT: s_add_i32 s2, s2, 26 ; GFX1150-NEXT: .p2align 6 -; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body ; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1150-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17341,7 +17341,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-NEXT: ; %bb.14: ; %Flow ; GFX1150-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX1150-NEXT: v_mov_b32_e32 v13, v17 -; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1150-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX1150-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 @@ -17386,7 +17386,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]| ; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1200-NEXT: s_cbranch_vccz .LBB13_2 -; GFX1200-NEXT: ; %bb.1: ; %frem.else +; GFX1200-NEXT: ; %bb.1: ; %frem.else16 ; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]| ; GFX1200-NEXT: v_and_b32_e32 v8, 0x80000000, v1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -17396,7 +17396,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB13_8 ; GFX1200-NEXT: .LBB13_2: ; GFX1200-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1200-NEXT: .LBB13_3: ; %frem.compute +; GFX1200-NEXT: .LBB13_3: ; %frem.compute15 ; GFX1200-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]| ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5] ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1] @@ -17429,11 +17429,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17 ; GFX1200-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB13_7 -; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader +; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader ; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3 ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-NEXT: s_add_co_i32 s2, s2, 26 -; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body +; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body23 ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) ; GFX1200-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10 @@ -17454,7 +17454,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.6: ; %Flow51 ; GFX1200-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14 ; GFX1200-NEXT: v_mov_b32_e32 v11, v15 -; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit +; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit24 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_subrev_nc_u32_e32 v14, 25, v17 ; GFX1200-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14 @@ -17476,7 +17476,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_cbranch_vccz .LBB13_10 -; GFX1200-NEXT: ; %bb.9: ; %frem.else16 +; GFX1200-NEXT: ; %bb.9: ; %frem.else ; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]| ; GFX1200-NEXT: v_and_b32_e32 v10, 0x80000000, v3 ; GFX1200-NEXT: s_wait_alu 0xfffd @@ -17487,7 +17487,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: s_branch .LBB13_16 ; GFX1200-NEXT: .LBB13_10: ; GFX1200-NEXT: ; implicit-def: $vgpr10_vgpr11 -; GFX1200-NEXT: .LBB13_11: ; %frem.compute15 +; GFX1200-NEXT: .LBB13_11: ; %frem.compute ; GFX1200-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]| ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7] ; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3] @@ -17520,11 +17520,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19 ; GFX1200-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0 ; GFX1200-NEXT: s_cbranch_vccnz .LBB13_15 -; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader +; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader ; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3 ; GFX1200-NEXT: s_wait_alu 0xfffe ; GFX1200-NEXT: s_add_co_i32 s2, s2, 26 -; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body23 +; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body ; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2) ; GFX1200-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12 @@ -17547,7 +17547,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-NEXT: ; %bb.14: ; %Flow ; GFX1200-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16 ; GFX1200-NEXT: v_mov_b32_e32 v13, v17 -; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit24 +; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX1200-NEXT: v_subrev_nc_u32_e32 v16, 25, v19 ; GFX1200-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16 |