aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/frem.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/frem.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/frem.ll1365
1 files changed, 669 insertions, 696 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index 78a961e..35d178c 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -4858,7 +4858,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: v_cvt_f16_f32_e32 v4, v2
; SI-NEXT: s_cbranch_vccz .LBB9_2
-; SI-NEXT: ; %bb.1: ; %frem.else
+; SI-NEXT: ; %bb.1: ; %frem.else20
; SI-NEXT: v_bfi_b32 v7, s0, 0, v2
; SI-NEXT: v_cvt_f32_f16_e32 v4, v4
; SI-NEXT: v_cmp_eq_f32_e32 vcc, v5, v6
@@ -4869,7 +4869,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB9_2:
; SI-NEXT: ; implicit-def: $vgpr4
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB9_3: ; %frem.compute
+; SI-NEXT: .LBB9_3: ; %frem.compute19
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v5|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v5
@@ -4905,10 +4905,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB9_7
-; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; SI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB9_5: ; %frem.loop_body
+; SI-NEXT: .LBB9_5: ; %frem.loop_body27
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v7, v5
; SI-NEXT: v_mul_f32_e32 v5, v7, v6
@@ -4923,7 +4923,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB9_5
; SI-NEXT: ; %bb.6: ; %Flow55
; SI-NEXT: v_mov_b32_e32 v5, v7
-; SI-NEXT: .LBB9_7: ; %frem.loop_exit
+; SI-NEXT: .LBB9_7: ; %frem.loop_exit28
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v5, v5, s1
; SI-NEXT: v_mul_f32_e32 v6, v5, v6
@@ -4944,7 +4944,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cvt_f32_f16_e64 v7, |v7|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v7
; SI-NEXT: s_cbranch_vccz .LBB9_10
-; SI-NEXT: ; %bb.9: ; %frem.else20
+; SI-NEXT: ; %bb.9: ; %frem.else
; SI-NEXT: s_brev_b32 s0, -2
; SI-NEXT: v_bfi_b32 v8, s0, 0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v5, v5
@@ -4956,7 +4956,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB9_10:
; SI-NEXT: ; implicit-def: $vgpr5
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB9_11: ; %frem.compute19
+; SI-NEXT: .LBB9_11: ; %frem.compute
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v6|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v6
@@ -4992,10 +4992,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB9_15
-; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB9_13: ; %frem.loop_body27
+; SI-NEXT: .LBB9_13: ; %frem.loop_body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v8, v6
; SI-NEXT: v_mul_f32_e32 v6, v8, v7
@@ -5010,7 +5010,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB9_13
; SI-NEXT: ; %bb.14: ; %Flow
; SI-NEXT: v_mov_b32_e32 v6, v8
-; SI-NEXT: .LBB9_15: ; %frem.loop_exit28
+; SI-NEXT: .LBB9_15: ; %frem.loop_exit
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v6, v6, s1
; SI-NEXT: v_mul_f32_e32 v7, v6, v7
@@ -5084,7 +5084,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_and_b32_e32 v5, 0x7fffffff, v3
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB9_2
-; CI-NEXT: ; %bb.1: ; %frem.else
+; CI-NEXT: ; %bb.1: ; %frem.else20
; CI-NEXT: v_cvt_f32_f16_e32 v4, v4
; CI-NEXT: v_bfi_b32 v7, s0, 0, v2
; CI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5
@@ -5093,7 +5093,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB9_8
; CI-NEXT: .LBB9_2:
; CI-NEXT: ; implicit-def: $vgpr4
-; CI-NEXT: .LBB9_3: ; %frem.compute
+; CI-NEXT: .LBB9_3: ; %frem.compute19
; CI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6
; CI-NEXT: v_frexp_mant_f32_e32 v4, v6
; CI-NEXT: v_frexp_mant_f32_e32 v6, v5
@@ -5118,10 +5118,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6
; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB9_7
-; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; CI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10
; CI-NEXT: v_add_i32_e32 v6, vcc, 11, v6
-; CI-NEXT: .LBB9_5: ; %frem.loop_body
+; CI-NEXT: .LBB9_5: ; %frem.loop_body27
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v9, v7
; CI-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -5136,7 +5136,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB9_5
; CI-NEXT: ; %bb.6: ; %Flow55
; CI-NEXT: v_mov_b32_e32 v7, v9
-; CI-NEXT: .LBB9_7: ; %frem.loop_exit
+; CI-NEXT: .LBB9_7: ; %frem.loop_exit28
; CI-NEXT: v_add_i32_e32 v6, vcc, -10, v6
; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6
; CI-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -5157,7 +5157,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cvt_f32_f16_e64 v6, |v6|
; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6
; CI-NEXT: s_cbranch_vccz .LBB9_10
-; CI-NEXT: ; %bb.9: ; %frem.else20
+; CI-NEXT: ; %bb.9: ; %frem.else
; CI-NEXT: v_cvt_f32_f16_e32 v5, v5
; CI-NEXT: s_brev_b32 s0, -2
; CI-NEXT: v_bfi_b32 v8, s0, 0, v0
@@ -5167,7 +5167,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB9_16
; CI-NEXT: .LBB9_10:
; CI-NEXT: ; implicit-def: $vgpr5
-; CI-NEXT: .LBB9_11: ; %frem.compute19
+; CI-NEXT: .LBB9_11: ; %frem.compute
; CI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7
; CI-NEXT: v_frexp_mant_f32_e32 v5, v7
; CI-NEXT: v_frexp_mant_f32_e32 v7, v6
@@ -5192,10 +5192,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7
; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB9_15
-; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11
; CI-NEXT: v_add_i32_e32 v7, vcc, 11, v7
-; CI-NEXT: .LBB9_13: ; %frem.loop_body27
+; CI-NEXT: .LBB9_13: ; %frem.loop_body
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v10, v8
; CI-NEXT: v_mul_f32_e32 v8, v10, v9
@@ -5210,7 +5210,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB9_13
; CI-NEXT: ; %bb.14: ; %Flow
; CI-NEXT: v_mov_b32_e32 v8, v10
-; CI-NEXT: .LBB9_15: ; %frem.loop_exit28
+; CI-NEXT: .LBB9_15: ; %frem.loop_exit
; CI-NEXT: v_add_i32_e32 v7, vcc, -10, v7
; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7
; CI-NEXT: v_mul_f32_e32 v8, v7, v9
@@ -5275,7 +5275,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v3, |v1|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3
; VI-NEXT: s_cbranch_vccz .LBB9_2
-; VI-NEXT: ; %bb.1: ; %frem.else
+; VI-NEXT: ; %bb.1: ; %frem.else20
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v2, s2, 0, v0
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3
@@ -5284,7 +5284,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB9_8
; VI-NEXT: .LBB9_2:
; VI-NEXT: ; implicit-def: $vgpr2
-; VI-NEXT: .LBB9_3: ; %frem.compute
+; VI-NEXT: .LBB9_3: ; %frem.compute19
; VI-NEXT: v_frexp_exp_i32_f32_e32 v7, v4
; VI-NEXT: v_frexp_mant_f32_e32 v2, v4
; VI-NEXT: v_frexp_mant_f32_e32 v4, v3
@@ -5309,10 +5309,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4
; VI-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB9_7
-; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; VI-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; VI-NEXT: v_sub_u32_e32 v4, vcc, v7, v8
; VI-NEXT: v_add_u32_e32 v4, vcc, 11, v4
-; VI-NEXT: .LBB9_5: ; %frem.loop_body
+; VI-NEXT: .LBB9_5: ; %frem.loop_body27
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v7, v5
; VI-NEXT: v_mul_f32_e32 v5, v7, v6
@@ -5327,7 +5327,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB9_5
; VI-NEXT: ; %bb.6: ; %Flow55
; VI-NEXT: v_mov_b32_e32 v5, v7
-; VI-NEXT: .LBB9_7: ; %frem.loop_exit
+; VI-NEXT: .LBB9_7: ; %frem.loop_exit28
; VI-NEXT: v_add_u32_e32 v4, vcc, -10, v4
; VI-NEXT: v_ldexp_f32 v4, v5, v4
; VI-NEXT: v_mul_f32_e32 v5, v4, v6
@@ -5347,7 +5347,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v6, |v4|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v7, v6
; VI-NEXT: s_cbranch_vccz .LBB9_10
-; VI-NEXT: ; %bb.9: ; %frem.else20
+; VI-NEXT: ; %bb.9: ; %frem.else
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v5, s2, 0, v3
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v7, v6
@@ -5356,7 +5356,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB9_16
; VI-NEXT: .LBB9_10:
; VI-NEXT: ; implicit-def: $vgpr5
-; VI-NEXT: .LBB9_11: ; %frem.compute19
+; VI-NEXT: .LBB9_11: ; %frem.compute
; VI-NEXT: v_frexp_exp_i32_f32_e32 v10, v7
; VI-NEXT: v_frexp_mant_f32_e32 v5, v7
; VI-NEXT: v_frexp_mant_f32_e32 v7, v6
@@ -5381,10 +5381,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v7
; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB9_15
-; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11
; VI-NEXT: v_add_u32_e32 v7, vcc, 11, v7
-; VI-NEXT: .LBB9_13: ; %frem.loop_body27
+; VI-NEXT: .LBB9_13: ; %frem.loop_body
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v10, v8
; VI-NEXT: v_mul_f32_e32 v8, v10, v9
@@ -5399,7 +5399,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB9_13
; VI-NEXT: ; %bb.14: ; %Flow
; VI-NEXT: v_mov_b32_e32 v8, v10
-; VI-NEXT: .LBB9_15: ; %frem.loop_exit28
+; VI-NEXT: .LBB9_15: ; %frem.loop_exit
; VI-NEXT: v_add_u32_e32 v7, vcc, -10, v7
; VI-NEXT: v_ldexp_f32 v7, v8, v7
; VI-NEXT: v_mul_f32_e32 v8, v7, v9
@@ -5443,7 +5443,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_e64 v3, |v0|
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v4, v3
; GFX9-NEXT: s_cbranch_vccz .LBB9_2
-; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: ; %bb.1: ; %frem.else20
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v2, s2, 0, v1
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v4, v3
@@ -5452,7 +5452,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB9_8
; GFX9-NEXT: .LBB9_2:
; GFX9-NEXT: ; implicit-def: $vgpr2
-; GFX9-NEXT: .LBB9_3: ; %frem.compute
+; GFX9-NEXT: .LBB9_3: ; %frem.compute19
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v7, v4
; GFX9-NEXT: v_frexp_mant_f32_e32 v2, v4
; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v3
@@ -5477,10 +5477,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v4
; GFX9-NEXT: v_div_fixup_f32 v6, v6, v3, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX9-NEXT: v_sub_u32_e32 v4, v7, v8
; GFX9-NEXT: v_add_u32_e32 v4, 11, v4
-; GFX9-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX9-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v7, v5
; GFX9-NEXT: v_mul_f32_e32 v5, v7, v6
@@ -5495,7 +5495,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB9_5
; GFX9-NEXT: ; %bb.6: ; %Flow55
; GFX9-NEXT: v_mov_b32_e32 v5, v7
-; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX9-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX9-NEXT: v_add_u32_e32 v4, -10, v4
; GFX9-NEXT: v_ldexp_f32 v4, v5, v4
; GFX9-NEXT: v_mul_f32_e32 v5, v4, v6
@@ -5514,7 +5514,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_sdwa v5, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5
; GFX9-NEXT: s_cbranch_vccz .LBB9_10
-; GFX9-NEXT: ; %bb.9: ; %frem.else20
+; GFX9-NEXT: ; %bb.9: ; %frem.else
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v3
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5
@@ -5523,7 +5523,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB9_16
; GFX9-NEXT: .LBB9_10:
; GFX9-NEXT: ; implicit-def: $vgpr4
-; GFX9-NEXT: .LBB9_11: ; %frem.compute19
+; GFX9-NEXT: .LBB9_11: ; %frem.compute
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6
; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6
; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5
@@ -5548,10 +5548,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6
; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10
; GFX9-NEXT: v_add_u32_e32 v6, 11, v6
-; GFX9-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX9-NEXT: .LBB9_13: ; %frem.loop_body
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -5566,7 +5566,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB9_13
; GFX9-NEXT: ; %bb.14: ; %Flow
; GFX9-NEXT: v_mov_b32_e32 v7, v9
-; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX9-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX9-NEXT: v_add_u32_e32 v6, -10, v6
; GFX9-NEXT: v_ldexp_f32 v6, v7, v6
; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -5612,7 +5612,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v3, |v0|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3
; GFX10-NEXT: s_cbranch_vccz .LBB9_2
-; GFX10-NEXT: ; %bb.1: ; %frem.else
+; GFX10-NEXT: ; %bb.1: ; %frem.else20
; GFX10-NEXT: v_bfi_b32 v2, 0x7fff, 0, v1
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3
; GFX10-NEXT: v_cndmask_b32_e32 v2, v1, v2, vcc_lo
@@ -5620,7 +5620,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB9_8
; GFX10-NEXT: .LBB9_2:
; GFX10-NEXT: ; implicit-def: $vgpr2
-; GFX10-NEXT: .LBB9_3: ; %frem.compute
+; GFX10-NEXT: .LBB9_3: ; %frem.compute19
; GFX10-NEXT: v_frexp_mant_f32_e32 v2, v4
; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v3
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v5, v4
@@ -5647,10 +5647,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6
; GFX10-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX10-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX10-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v7, v4
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -5666,7 +5666,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.6: ; %Flow55
; GFX10-NEXT: v_mov_b32_e32 v6, s2
; GFX10-NEXT: v_mov_b32_e32 v4, v7
-; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX10-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX10-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX10-NEXT: v_ldexp_f32 v4, v4, v6
; GFX10-NEXT: v_mul_f32_e32 v5, v4, v5
@@ -5684,7 +5684,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v6, |v3|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v4
; GFX10-NEXT: s_cbranch_vccz .LBB9_10
-; GFX10-NEXT: ; %bb.9: ; %frem.else20
+; GFX10-NEXT: ; %bb.9: ; %frem.else
; GFX10-NEXT: v_bfi_b32 v5, 0x7fff, 0, v3
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v4
; GFX10-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc_lo
@@ -5692,7 +5692,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB9_16
; GFX10-NEXT: .LBB9_10:
; GFX10-NEXT: ; implicit-def: $vgpr5
-; GFX10-NEXT: .LBB9_11: ; %frem.compute19
+; GFX10-NEXT: .LBB9_11: ; %frem.compute
; GFX10-NEXT: v_frexp_mant_f32_e32 v5, v6
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6
; GFX10-NEXT: v_ldexp_f32 v6, v5, 11
@@ -5719,10 +5719,10 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8
; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX10-NEXT: .LBB9_13: ; %frem.loop_body
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v9, v6
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -5738,7 +5738,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.14: ; %Flow
; GFX10-NEXT: v_mov_b32_e32 v8, s2
; GFX10-NEXT: v_mov_b32_e32 v6, v9
-; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX10-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8
; GFX10-NEXT: v_ldexp_f32 v6, v6, v8
; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7
@@ -5782,7 +5782,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_2
-; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else20
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3
@@ -5793,7 +5793,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB9_8
; GFX11-TRUE16-NEXT: .LBB9_2:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr2
-; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX11-TRUE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, v4
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v3
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4
@@ -5829,11 +5829,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX11-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v4
@@ -5853,7 +5853,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow55
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX11-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX11-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -5880,7 +5880,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB9_10
-; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v3.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
@@ -5891,7 +5891,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB9_16
; GFX11-TRUE16-NEXT: .LBB9_10:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
-; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX11-TRUE16-NEXT: .LBB9_11: ; %frem.compute
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v6
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v6, v6
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
@@ -5927,11 +5927,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX11-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v7
@@ -5951,7 +5951,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v10
-; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX11-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v9, -10, v9
; GFX11-TRUE16-NEXT: v_ldexp_f32 v7, v7, v9
@@ -5972,16 +5972,14 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: .LBB9_16: ; %Flow54
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l|
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v3.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2
; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -6002,7 +6000,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v4, v3
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_2
-; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else20
; GFX11-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, v0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v4, v3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -6011,7 +6009,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB9_8
; GFX11-FAKE16-NEXT: .LBB9_2:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr2
-; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX11-FAKE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, v4
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v3
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, v4
@@ -6047,11 +6045,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX11-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v4
@@ -6071,7 +6069,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow55
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX11-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX11-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -6097,7 +6095,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v7, v5
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB9_10
-; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else
; GFX11-FAKE16-NEXT: v_bfi_b32 v6, 0x7fff, 0, v3
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v7, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -6106,7 +6104,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB9_16
; GFX11-FAKE16-NEXT: .LBB9_10:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr6
-; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX11-FAKE16-NEXT: .LBB9_11: ; %frem.compute
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v6, v7
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v8, v7
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
@@ -6142,11 +6140,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX11-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v7
@@ -6166,7 +6164,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v10
-; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX11-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v9, -10, v9
; GFX11-FAKE16-NEXT: v_ldexp_f32 v7, v7, v9
@@ -6220,7 +6218,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2
-; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else20
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s6, s5
@@ -6232,7 +6230,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB9_8
; GFX1150-TRUE16-NEXT: .LBB9_2:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0
-; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX1150-TRUE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -6267,11 +6265,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s5, s6, s5
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s5, s5, 11
-; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX1150-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2
@@ -6293,7 +6291,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow55
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s5
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX1150-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -6323,7 +6321,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10
-; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s7
@@ -6335,7 +6333,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB9_16
; GFX1150-TRUE16-NEXT: .LBB9_10:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
-; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX1150-TRUE16-NEXT: .LBB9_11: ; %frem.compute
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8
@@ -6370,11 +6368,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s7, s8, s7
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s7, s7, 11
-; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX1150-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3
@@ -6396,7 +6394,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s7
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX1150-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -6422,19 +6420,16 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s2, s3
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s5, 0
; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2
; GFX1150-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1150-TRUE16-NEXT: s_endpgm
;
@@ -6459,7 +6454,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2
-; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else20
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s6, s5
; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -6469,7 +6464,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB9_8
; GFX1150-FAKE16-NEXT: .LBB9_2:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0
-; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX1150-FAKE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -6504,11 +6499,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s5, s6, s5
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s5, s5, 11
-; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX1150-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2
@@ -6530,7 +6525,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow55
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s5
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX1150-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -6559,7 +6554,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10
-; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s7
; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -6569,7 +6564,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB9_16
; GFX1150-FAKE16-NEXT: .LBB9_10:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1
-; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX1150-FAKE16-NEXT: .LBB9_11: ; %frem.compute
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8
@@ -6604,11 +6599,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s7, s8, s7
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s7, s7, 11
-; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX1150-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3
@@ -6630,7 +6625,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s7
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX1150-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -6690,7 +6685,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_2
-; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else20
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s4
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s6, s5
@@ -6702,7 +6697,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB9_8
; GFX1200-TRUE16-NEXT: .LBB9_2:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0
-; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX1200-TRUE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s5
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s6
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -6737,11 +6732,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s5, s6, s5
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s5, s5, 11
-; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX1200-TRUE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2
@@ -6765,7 +6760,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow55
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s5
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX1200-TRUE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -6799,7 +6794,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB9_10
-; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s6
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s7
@@ -6811,7 +6806,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB9_16
; GFX1200-TRUE16-NEXT: .LBB9_10:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
-; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX1200-TRUE16-NEXT: .LBB9_11: ; %frem.compute
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s7
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s8
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8
@@ -6847,11 +6842,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s7, s8, s7
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s7, s7, 11
-; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX1200-TRUE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3
@@ -6875,7 +6870,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s7
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX1200-TRUE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -6902,20 +6897,17 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s2, s3
; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s5, 0
; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2
; GFX1200-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1200-TRUE16-NEXT: s_endpgm
;
@@ -6940,7 +6932,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s6, s5
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_2
-; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else20
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s6, s5
; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s4
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -6950,7 +6942,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB9_8
; GFX1200-FAKE16-NEXT: .LBB9_2:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0
-; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute
+; GFX1200-FAKE16-NEXT: .LBB9_3: ; %frem.compute19
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s5
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s6
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -6986,11 +6978,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_7
-; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body27.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s5, s6, s5
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s5, s5, 11
-; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body
+; GFX1200-FAKE16-NEXT: .LBB9_5: ; %frem.loop_body27
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2
@@ -7014,7 +7006,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow55
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s5
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit
+; GFX1200-FAKE16-NEXT: .LBB9_7: ; %frem.loop_exit28
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -7047,7 +7039,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s7
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB9_10
-; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s7
; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s6
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -7058,7 +7050,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB9_16
; GFX1200-FAKE16-NEXT: .LBB9_10:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1
-; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute19
+; GFX1200-FAKE16-NEXT: .LBB9_11: ; %frem.compute
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s7
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s8
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s8
@@ -7094,11 +7086,11 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB9_15
-; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s7, s8, s7
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s7, s7, 11
-; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body27
+; GFX1200-FAKE16-NEXT: .LBB9_13: ; %frem.loop_body
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3
@@ -7122,7 +7114,7 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s7
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit28
+; GFX1200-FAKE16-NEXT: .LBB9_15: ; %frem.loop_exit
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -7208,7 +7200,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: v_cvt_f16_f32_e32 v8, v6
; SI-NEXT: s_cbranch_vccz .LBB10_2
-; SI-NEXT: ; %bb.1: ; %frem.else
+; SI-NEXT: ; %bb.1: ; %frem.else86
; SI-NEXT: v_bfi_b32 v11, s0, 0, v6
; SI-NEXT: v_cvt_f32_f16_e32 v8, v8
; SI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v10
@@ -7219,7 +7211,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB10_2:
; SI-NEXT: ; implicit-def: $vgpr8
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB10_3: ; %frem.compute
+; SI-NEXT: .LBB10_3: ; %frem.compute85
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v9|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v9
@@ -7255,10 +7247,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB10_7
-; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; SI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB10_5: ; %frem.loop_body
+; SI-NEXT: .LBB10_5: ; %frem.loop_body93
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v11, v9
; SI-NEXT: v_mul_f32_e32 v9, v11, v10
@@ -7273,7 +7265,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB10_5
; SI-NEXT: ; %bb.6: ; %Flow133
; SI-NEXT: v_mov_b32_e32 v9, v11
-; SI-NEXT: .LBB10_7: ; %frem.loop_exit
+; SI-NEXT: .LBB10_7: ; %frem.loop_exit94
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v9, v9, s1
; SI-NEXT: v_mul_f32_e32 v10, v9, v10
@@ -7294,7 +7286,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cvt_f32_f16_e64 v11, |v11|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v11
; SI-NEXT: s_cbranch_vccz .LBB10_10
-; SI-NEXT: ; %bb.9: ; %frem.else20
+; SI-NEXT: ; %bb.9: ; %frem.else53
; SI-NEXT: s_brev_b32 s0, -2
; SI-NEXT: v_bfi_b32 v12, s0, 0, v4
; SI-NEXT: v_cvt_f32_f16_e32 v9, v9
@@ -7306,7 +7298,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB10_10:
; SI-NEXT: ; implicit-def: $vgpr9
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB10_11: ; %frem.compute19
+; SI-NEXT: .LBB10_11: ; %frem.compute52
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v10|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v10
@@ -7342,10 +7334,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB10_15
-; SI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; SI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB10_13: ; %frem.loop_body27
+; SI-NEXT: .LBB10_13: ; %frem.loop_body60
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v12, v10
; SI-NEXT: v_mul_f32_e32 v10, v12, v11
@@ -7360,7 +7352,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB10_13
; SI-NEXT: ; %bb.14: ; %Flow129
; SI-NEXT: v_mov_b32_e32 v10, v12
-; SI-NEXT: .LBB10_15: ; %frem.loop_exit28
+; SI-NEXT: .LBB10_15: ; %frem.loop_exit61
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v10, v10, s1
; SI-NEXT: v_mul_f32_e32 v11, v10, v11
@@ -7381,7 +7373,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cvt_f32_f16_e64 v12, |v12|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v12
; SI-NEXT: s_cbranch_vccz .LBB10_18
-; SI-NEXT: ; %bb.17: ; %frem.else53
+; SI-NEXT: ; %bb.17: ; %frem.else20
; SI-NEXT: s_brev_b32 s0, -2
; SI-NEXT: v_bfi_b32 v13, s0, 0, v2
; SI-NEXT: v_cvt_f32_f16_e32 v10, v10
@@ -7393,7 +7385,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB10_18:
; SI-NEXT: ; implicit-def: $vgpr10
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB10_19: ; %frem.compute52
+; SI-NEXT: .LBB10_19: ; %frem.compute19
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v11|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v11
@@ -7429,10 +7421,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB10_23
-; SI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; SI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB10_21: ; %frem.loop_body60
+; SI-NEXT: .LBB10_21: ; %frem.loop_body27
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v13, v11
; SI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -7447,7 +7439,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB10_21
; SI-NEXT: ; %bb.22: ; %Flow125
; SI-NEXT: v_mov_b32_e32 v11, v13
-; SI-NEXT: .LBB10_23: ; %frem.loop_exit61
+; SI-NEXT: .LBB10_23: ; %frem.loop_exit28
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v11, v11, s1
; SI-NEXT: v_mul_f32_e32 v12, v11, v12
@@ -7468,7 +7460,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cvt_f32_f16_e64 v13, |v13|
; SI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v13
; SI-NEXT: s_cbranch_vccz .LBB10_26
-; SI-NEXT: ; %bb.25: ; %frem.else86
+; SI-NEXT: ; %bb.25: ; %frem.else
; SI-NEXT: s_brev_b32 s0, -2
; SI-NEXT: v_bfi_b32 v14, s0, 0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v11, v11
@@ -7480,7 +7472,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB10_26:
; SI-NEXT: ; implicit-def: $vgpr11
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB10_27: ; %frem.compute85
+; SI-NEXT: .LBB10_27: ; %frem.compute
; SI-NEXT: s_mov_b32 s3, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 vcc, |v12|, s3
; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v12
@@ -7516,10 +7508,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0
; SI-NEXT: s_cmp_lt_i32 s1, 12
; SI-NEXT: s_cbranch_scc1 .LBB10_31
-; SI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; SI-NEXT: s_sub_i32 s1, s2, s3
; SI-NEXT: s_add_i32 s1, s1, 11
-; SI-NEXT: .LBB10_29: ; %frem.loop_body93
+; SI-NEXT: .LBB10_29: ; %frem.loop_body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v14, v12
; SI-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -7534,7 +7526,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB10_29
; SI-NEXT: ; %bb.30: ; %Flow
; SI-NEXT: v_mov_b32_e32 v12, v14
-; SI-NEXT: .LBB10_31: ; %frem.loop_exit94
+; SI-NEXT: .LBB10_31: ; %frem.loop_exit
; SI-NEXT: s_add_i32 s1, s1, -10
; SI-NEXT: v_ldexp_f32_e64 v12, v12, s1
; SI-NEXT: v_mul_f32_e32 v13, v12, v13
@@ -7638,7 +7630,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_and_b32_e32 v9, 0x7fffffff, v7
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB10_2
-; CI-NEXT: ; %bb.1: ; %frem.else
+; CI-NEXT: ; %bb.1: ; %frem.else86
; CI-NEXT: v_cvt_f32_f16_e32 v8, v8
; CI-NEXT: v_bfi_b32 v11, s0, 0, v6
; CI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9
@@ -7647,7 +7639,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB10_8
; CI-NEXT: .LBB10_2:
; CI-NEXT: ; implicit-def: $vgpr8
-; CI-NEXT: .LBB10_3: ; %frem.compute
+; CI-NEXT: .LBB10_3: ; %frem.compute85
; CI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10
; CI-NEXT: v_frexp_mant_f32_e32 v8, v10
; CI-NEXT: v_frexp_mant_f32_e32 v10, v9
@@ -7672,10 +7664,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10
; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB10_7
-; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; CI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14
; CI-NEXT: v_add_i32_e32 v10, vcc, 11, v10
-; CI-NEXT: .LBB10_5: ; %frem.loop_body
+; CI-NEXT: .LBB10_5: ; %frem.loop_body93
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v13, v11
; CI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -7690,7 +7682,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB10_5
; CI-NEXT: ; %bb.6: ; %Flow133
; CI-NEXT: v_mov_b32_e32 v11, v13
-; CI-NEXT: .LBB10_7: ; %frem.loop_exit
+; CI-NEXT: .LBB10_7: ; %frem.loop_exit94
; CI-NEXT: v_add_i32_e32 v10, vcc, -10, v10
; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10
; CI-NEXT: v_mul_f32_e32 v11, v10, v12
@@ -7711,7 +7703,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cvt_f32_f16_e64 v10, |v10|
; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10
; CI-NEXT: s_cbranch_vccz .LBB10_10
-; CI-NEXT: ; %bb.9: ; %frem.else20
+; CI-NEXT: ; %bb.9: ; %frem.else53
; CI-NEXT: v_cvt_f32_f16_e32 v9, v9
; CI-NEXT: s_brev_b32 s0, -2
; CI-NEXT: v_bfi_b32 v12, s0, 0, v4
@@ -7721,7 +7713,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB10_16
; CI-NEXT: .LBB10_10:
; CI-NEXT: ; implicit-def: $vgpr9
-; CI-NEXT: .LBB10_11: ; %frem.compute19
+; CI-NEXT: .LBB10_11: ; %frem.compute52
; CI-NEXT: v_frexp_exp_i32_f32_e32 v14, v11
; CI-NEXT: v_frexp_mant_f32_e32 v9, v11
; CI-NEXT: v_frexp_mant_f32_e32 v11, v10
@@ -7746,10 +7738,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11
; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB10_15
-; CI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; CI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15
; CI-NEXT: v_add_i32_e32 v11, vcc, 11, v11
-; CI-NEXT: .LBB10_13: ; %frem.loop_body27
+; CI-NEXT: .LBB10_13: ; %frem.loop_body60
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v14, v12
; CI-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -7764,7 +7756,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB10_13
; CI-NEXT: ; %bb.14: ; %Flow129
; CI-NEXT: v_mov_b32_e32 v12, v14
-; CI-NEXT: .LBB10_15: ; %frem.loop_exit28
+; CI-NEXT: .LBB10_15: ; %frem.loop_exit61
; CI-NEXT: v_add_i32_e32 v11, vcc, -10, v11
; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11
; CI-NEXT: v_mul_f32_e32 v12, v11, v13
@@ -7785,7 +7777,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cvt_f32_f16_e64 v11, |v11|
; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v12, v11
; CI-NEXT: s_cbranch_vccz .LBB10_18
-; CI-NEXT: ; %bb.17: ; %frem.else53
+; CI-NEXT: ; %bb.17: ; %frem.else20
; CI-NEXT: v_cvt_f32_f16_e32 v10, v10
; CI-NEXT: s_brev_b32 s0, -2
; CI-NEXT: v_bfi_b32 v13, s0, 0, v2
@@ -7795,7 +7787,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB10_24
; CI-NEXT: .LBB10_18:
; CI-NEXT: ; implicit-def: $vgpr10
-; CI-NEXT: .LBB10_19: ; %frem.compute52
+; CI-NEXT: .LBB10_19: ; %frem.compute19
; CI-NEXT: v_frexp_exp_i32_f32_e32 v15, v12
; CI-NEXT: v_frexp_mant_f32_e32 v10, v12
; CI-NEXT: v_frexp_mant_f32_e32 v12, v11
@@ -7820,10 +7812,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v12
; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB10_23
-; CI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; CI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16
; CI-NEXT: v_add_i32_e32 v12, vcc, 11, v12
-; CI-NEXT: .LBB10_21: ; %frem.loop_body60
+; CI-NEXT: .LBB10_21: ; %frem.loop_body27
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v15, v13
; CI-NEXT: v_mul_f32_e32 v13, v15, v14
@@ -7838,7 +7830,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB10_21
; CI-NEXT: ; %bb.22: ; %Flow125
; CI-NEXT: v_mov_b32_e32 v13, v15
-; CI-NEXT: .LBB10_23: ; %frem.loop_exit61
+; CI-NEXT: .LBB10_23: ; %frem.loop_exit28
; CI-NEXT: v_add_i32_e32 v12, vcc, -10, v12
; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12
; CI-NEXT: v_mul_f32_e32 v13, v12, v14
@@ -7859,7 +7851,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cvt_f32_f16_e64 v12, |v12|
; CI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12
; CI-NEXT: s_cbranch_vccz .LBB10_26
-; CI-NEXT: ; %bb.25: ; %frem.else86
+; CI-NEXT: ; %bb.25: ; %frem.else
; CI-NEXT: v_cvt_f32_f16_e32 v11, v11
; CI-NEXT: s_brev_b32 s0, -2
; CI-NEXT: v_bfi_b32 v14, s0, 0, v0
@@ -7869,7 +7861,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB10_32
; CI-NEXT: .LBB10_26:
; CI-NEXT: ; implicit-def: $vgpr11
-; CI-NEXT: .LBB10_27: ; %frem.compute85
+; CI-NEXT: .LBB10_27: ; %frem.compute
; CI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13
; CI-NEXT: v_frexp_mant_f32_e32 v11, v13
; CI-NEXT: v_frexp_mant_f32_e32 v13, v12
@@ -7894,10 +7886,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13
; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB10_31
-; CI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17
; CI-NEXT: v_add_i32_e32 v13, vcc, 11, v13
-; CI-NEXT: .LBB10_29: ; %frem.loop_body93
+; CI-NEXT: .LBB10_29: ; %frem.loop_body
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v16, v14
; CI-NEXT: v_mul_f32_e32 v14, v16, v15
@@ -7912,7 +7904,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB10_29
; CI-NEXT: ; %bb.30: ; %Flow
; CI-NEXT: v_mov_b32_e32 v14, v16
-; CI-NEXT: .LBB10_31: ; %frem.loop_exit94
+; CI-NEXT: .LBB10_31: ; %frem.loop_exit
; CI-NEXT: v_add_i32_e32 v13, vcc, -10, v13
; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13
; CI-NEXT: v_mul_f32_e32 v14, v13, v15
@@ -8001,7 +7993,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v5, |v2|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5
; VI-NEXT: s_cbranch_vccz .LBB10_2
-; VI-NEXT: ; %bb.1: ; %frem.else
+; VI-NEXT: ; %bb.1: ; %frem.else86
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v4, s2, 0, v0
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5
@@ -8010,7 +8002,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB10_8
; VI-NEXT: .LBB10_2:
; VI-NEXT: ; implicit-def: $vgpr4
-; VI-NEXT: .LBB10_3: ; %frem.compute
+; VI-NEXT: .LBB10_3: ; %frem.compute85
; VI-NEXT: v_frexp_exp_i32_f32_e32 v9, v6
; VI-NEXT: v_frexp_mant_f32_e32 v4, v6
; VI-NEXT: v_frexp_mant_f32_e32 v6, v5
@@ -8035,10 +8027,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6
; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB10_7
-; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; VI-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10
; VI-NEXT: v_add_u32_e32 v6, vcc, 11, v6
-; VI-NEXT: .LBB10_5: ; %frem.loop_body
+; VI-NEXT: .LBB10_5: ; %frem.loop_body93
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v9, v7
; VI-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -8053,7 +8045,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB10_5
; VI-NEXT: ; %bb.6: ; %Flow133
; VI-NEXT: v_mov_b32_e32 v7, v9
-; VI-NEXT: .LBB10_7: ; %frem.loop_exit
+; VI-NEXT: .LBB10_7: ; %frem.loop_exit94
; VI-NEXT: v_add_u32_e32 v6, vcc, -10, v6
; VI-NEXT: v_ldexp_f32 v6, v7, v6
; VI-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -8073,7 +8065,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v8, |v6|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8
; VI-NEXT: s_cbranch_vccz .LBB10_10
-; VI-NEXT: ; %bb.9: ; %frem.else20
+; VI-NEXT: ; %bb.9: ; %frem.else53
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v7, s2, 0, v5
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8
@@ -8082,7 +8074,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB10_16
; VI-NEXT: .LBB10_10:
; VI-NEXT: ; implicit-def: $vgpr7
-; VI-NEXT: .LBB10_11: ; %frem.compute19
+; VI-NEXT: .LBB10_11: ; %frem.compute52
; VI-NEXT: v_frexp_exp_i32_f32_e32 v12, v9
; VI-NEXT: v_frexp_mant_f32_e32 v7, v9
; VI-NEXT: v_frexp_mant_f32_e32 v9, v8
@@ -8107,10 +8099,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9
; VI-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB10_15
-; VI-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; VI-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; VI-NEXT: v_sub_u32_e32 v9, vcc, v12, v13
; VI-NEXT: v_add_u32_e32 v9, vcc, 11, v9
-; VI-NEXT: .LBB10_13: ; %frem.loop_body27
+; VI-NEXT: .LBB10_13: ; %frem.loop_body60
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v12, v10
; VI-NEXT: v_mul_f32_e32 v10, v12, v11
@@ -8125,7 +8117,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB10_13
; VI-NEXT: ; %bb.14: ; %Flow129
; VI-NEXT: v_mov_b32_e32 v10, v12
-; VI-NEXT: .LBB10_15: ; %frem.loop_exit28
+; VI-NEXT: .LBB10_15: ; %frem.loop_exit61
; VI-NEXT: v_add_u32_e32 v9, vcc, -10, v9
; VI-NEXT: v_ldexp_f32 v9, v10, v9
; VI-NEXT: v_mul_f32_e32 v10, v9, v11
@@ -8143,7 +8135,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v9, |v3|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v10, v9
; VI-NEXT: s_cbranch_vccz .LBB10_18
-; VI-NEXT: ; %bb.17: ; %frem.else53
+; VI-NEXT: ; %bb.17: ; %frem.else20
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v8, s2, 0, v1
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v10, v9
@@ -8152,7 +8144,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB10_24
; VI-NEXT: .LBB10_18:
; VI-NEXT: ; implicit-def: $vgpr8
-; VI-NEXT: .LBB10_19: ; %frem.compute52
+; VI-NEXT: .LBB10_19: ; %frem.compute19
; VI-NEXT: v_frexp_exp_i32_f32_e32 v13, v10
; VI-NEXT: v_frexp_mant_f32_e32 v8, v10
; VI-NEXT: v_frexp_mant_f32_e32 v10, v9
@@ -8177,10 +8169,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v10
; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB10_23
-; VI-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; VI-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14
; VI-NEXT: v_add_u32_e32 v10, vcc, 11, v10
-; VI-NEXT: .LBB10_21: ; %frem.loop_body60
+; VI-NEXT: .LBB10_21: ; %frem.loop_body27
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v13, v11
; VI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -8195,7 +8187,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB10_21
; VI-NEXT: ; %bb.22: ; %Flow125
; VI-NEXT: v_mov_b32_e32 v11, v13
-; VI-NEXT: .LBB10_23: ; %frem.loop_exit61
+; VI-NEXT: .LBB10_23: ; %frem.loop_exit28
; VI-NEXT: v_add_u32_e32 v10, vcc, -10, v10
; VI-NEXT: v_ldexp_f32 v10, v11, v10
; VI-NEXT: v_mul_f32_e32 v11, v10, v12
@@ -8215,7 +8207,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cvt_f32_f16_e64 v12, |v10|
; VI-NEXT: v_cmp_ngt_f32_e32 vcc, v13, v12
; VI-NEXT: s_cbranch_vccz .LBB10_26
-; VI-NEXT: ; %bb.25: ; %frem.else86
+; VI-NEXT: ; %bb.25: ; %frem.else
; VI-NEXT: s_movk_i32 s2, 0x7fff
; VI-NEXT: v_bfi_b32 v11, s2, 0, v9
; VI-NEXT: v_cmp_eq_f32_e32 vcc, v13, v12
@@ -8224,7 +8216,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB10_32
; VI-NEXT: .LBB10_26:
; VI-NEXT: ; implicit-def: $vgpr11
-; VI-NEXT: .LBB10_27: ; %frem.compute85
+; VI-NEXT: .LBB10_27: ; %frem.compute
; VI-NEXT: v_frexp_exp_i32_f32_e32 v16, v13
; VI-NEXT: v_frexp_mant_f32_e32 v11, v13
; VI-NEXT: v_frexp_mant_f32_e32 v13, v12
@@ -8249,10 +8241,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 12, v13
; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB10_31
-; VI-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17
; VI-NEXT: v_add_u32_e32 v13, vcc, 11, v13
-; VI-NEXT: .LBB10_29: ; %frem.loop_body93
+; VI-NEXT: .LBB10_29: ; %frem.loop_body
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v16, v14
; VI-NEXT: v_mul_f32_e32 v14, v16, v15
@@ -8267,7 +8259,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB10_29
; VI-NEXT: ; %bb.30: ; %Flow
; VI-NEXT: v_mov_b32_e32 v14, v16
-; VI-NEXT: .LBB10_31: ; %frem.loop_exit94
+; VI-NEXT: .LBB10_31: ; %frem.loop_exit
; VI-NEXT: v_add_u32_e32 v13, vcc, -10, v13
; VI-NEXT: v_ldexp_f32 v13, v14, v13
; VI-NEXT: v_mul_f32_e32 v14, v13, v15
@@ -8320,7 +8312,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_e64 v5, |v0|
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v6, v5
; GFX9-NEXT: s_cbranch_vccz .LBB10_2
-; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: ; %bb.1: ; %frem.else86
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v2
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v6, v5
@@ -8329,7 +8321,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB10_8
; GFX9-NEXT: .LBB10_2:
; GFX9-NEXT: ; implicit-def: $vgpr4
-; GFX9-NEXT: .LBB10_3: ; %frem.compute
+; GFX9-NEXT: .LBB10_3: ; %frem.compute85
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v9, v6
; GFX9-NEXT: v_frexp_mant_f32_e32 v4, v6
; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v5
@@ -8354,10 +8346,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v6
; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10
; GFX9-NEXT: v_add_u32_e32 v6, 11, v6
-; GFX9-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX9-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -8372,7 +8364,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB10_5
; GFX9-NEXT: ; %bb.6: ; %Flow133
; GFX9-NEXT: v_mov_b32_e32 v7, v9
-; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX9-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX9-NEXT: v_add_u32_e32 v6, -10, v6
; GFX9-NEXT: v_ldexp_f32 v6, v7, v6
; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -8391,7 +8383,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_sdwa v7, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v8, v7
; GFX9-NEXT: s_cbranch_vccz .LBB10_10
-; GFX9-NEXT: ; %bb.9: ; %frem.else20
+; GFX9-NEXT: ; %bb.9: ; %frem.else53
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v6, s2, 0, v5
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v8, v7
@@ -8400,7 +8392,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB10_16
; GFX9-NEXT: .LBB10_10:
; GFX9-NEXT: ; implicit-def: $vgpr6
-; GFX9-NEXT: .LBB10_11: ; %frem.compute19
+; GFX9-NEXT: .LBB10_11: ; %frem.compute52
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v11, v8
; GFX9-NEXT: v_frexp_mant_f32_e32 v6, v8
; GFX9-NEXT: v_frexp_mant_f32_e32 v8, v7
@@ -8425,10 +8417,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v8
; GFX9-NEXT: v_div_fixup_f32 v10, v10, v7, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX9-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX9-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX9-NEXT: v_sub_u32_e32 v8, v11, v12
; GFX9-NEXT: v_add_u32_e32 v8, 11, v8
-; GFX9-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX9-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v11, v9
; GFX9-NEXT: v_mul_f32_e32 v9, v11, v10
@@ -8443,7 +8435,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB10_13
; GFX9-NEXT: ; %bb.14: ; %Flow129
; GFX9-NEXT: v_mov_b32_e32 v9, v11
-; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX9-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX9-NEXT: v_add_u32_e32 v8, -10, v8
; GFX9-NEXT: v_ldexp_f32 v8, v9, v8
; GFX9-NEXT: v_mul_f32_e32 v9, v8, v10
@@ -8461,7 +8453,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_e64 v8, |v1|
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v9, v8
; GFX9-NEXT: s_cbranch_vccz .LBB10_18
-; GFX9-NEXT: ; %bb.17: ; %frem.else53
+; GFX9-NEXT: ; %bb.17: ; %frem.else20
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v7, s2, 0, v3
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v9, v8
@@ -8470,7 +8462,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB10_24
; GFX9-NEXT: .LBB10_18:
; GFX9-NEXT: ; implicit-def: $vgpr7
-; GFX9-NEXT: .LBB10_19: ; %frem.compute52
+; GFX9-NEXT: .LBB10_19: ; %frem.compute19
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v12, v9
; GFX9-NEXT: v_frexp_mant_f32_e32 v7, v9
; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v8
@@ -8495,10 +8487,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v9
; GFX9-NEXT: v_div_fixup_f32 v11, v11, v8, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX9-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX9-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX9-NEXT: v_sub_u32_e32 v9, v12, v13
; GFX9-NEXT: v_add_u32_e32 v9, 11, v9
-; GFX9-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX9-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v12, v10
; GFX9-NEXT: v_mul_f32_e32 v10, v12, v11
@@ -8513,7 +8505,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB10_21
; GFX9-NEXT: ; %bb.22: ; %Flow125
; GFX9-NEXT: v_mov_b32_e32 v10, v12
-; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX9-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX9-NEXT: v_add_u32_e32 v9, -10, v9
; GFX9-NEXT: v_ldexp_f32 v9, v10, v9
; GFX9-NEXT: v_mul_f32_e32 v10, v9, v11
@@ -8532,7 +8524,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cvt_f32_f16_sdwa v10, |v1| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
; GFX9-NEXT: v_cmp_ngt_f32_e32 vcc, v11, v10
; GFX9-NEXT: s_cbranch_vccz .LBB10_26
-; GFX9-NEXT: ; %bb.25: ; %frem.else86
+; GFX9-NEXT: ; %bb.25: ; %frem.else
; GFX9-NEXT: s_movk_i32 s2, 0x7fff
; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v8
; GFX9-NEXT: v_cmp_eq_f32_e32 vcc, v11, v10
@@ -8541,7 +8533,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB10_32
; GFX9-NEXT: .LBB10_26:
; GFX9-NEXT: ; implicit-def: $vgpr9
-; GFX9-NEXT: .LBB10_27: ; %frem.compute85
+; GFX9-NEXT: .LBB10_27: ; %frem.compute
; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v14, v11
; GFX9-NEXT: v_frexp_mant_f32_e32 v9, v11
; GFX9-NEXT: v_frexp_mant_f32_e32 v11, v10
@@ -8566,10 +8558,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 12, v11
; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX9-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15
; GFX9-NEXT: v_add_u32_e32 v11, 11, v11
-; GFX9-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX9-NEXT: .LBB10_29: ; %frem.loop_body
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v14, v12
; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -8584,7 +8576,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB10_29
; GFX9-NEXT: ; %bb.30: ; %Flow
; GFX9-NEXT: v_mov_b32_e32 v12, v14
-; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX9-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX9-NEXT: v_add_u32_e32 v11, -10, v11
; GFX9-NEXT: v_ldexp_f32 v11, v12, v11
; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13
@@ -8640,7 +8632,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v5, |v0|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX10-NEXT: s_cbranch_vccz .LBB10_2
-; GFX10-NEXT: ; %bb.1: ; %frem.else
+; GFX10-NEXT: ; %bb.1: ; %frem.else86
; GFX10-NEXT: v_bfi_b32 v4, 0x7fff, 0, v2
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
; GFX10-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc_lo
@@ -8648,7 +8640,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB10_8
; GFX10-NEXT: .LBB10_2:
; GFX10-NEXT: ; implicit-def: $vgpr4
-; GFX10-NEXT: .LBB10_3: ; %frem.compute
+; GFX10-NEXT: .LBB10_3: ; %frem.compute85
; GFX10-NEXT: v_frexp_mant_f32_e32 v4, v6
; GFX10-NEXT: v_frexp_mant_f32_e32 v8, v5
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v6
@@ -8675,10 +8667,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v8
; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX10-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX10-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v9, v6
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -8694,7 +8686,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.6: ; %Flow133
; GFX10-NEXT: v_mov_b32_e32 v8, s2
; GFX10-NEXT: v_mov_b32_e32 v6, v9
-; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX10-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX10-NEXT: v_add_nc_u32_e32 v8, -10, v8
; GFX10-NEXT: v_ldexp_f32 v6, v6, v8
; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7
@@ -8712,7 +8704,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v5|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v8, v7
; GFX10-NEXT: s_cbranch_vccz .LBB10_10
-; GFX10-NEXT: ; %bb.9: ; %frem.else20
+; GFX10-NEXT: ; %bb.9: ; %frem.else53
; GFX10-NEXT: v_bfi_b32 v6, 0x7fff, 0, v5
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v8, v7
; GFX10-NEXT: v_cndmask_b32_e32 v6, v5, v6, vcc_lo
@@ -8720,7 +8712,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB10_16
; GFX10-NEXT: .LBB10_10:
; GFX10-NEXT: ; implicit-def: $vgpr6
-; GFX10-NEXT: .LBB10_11: ; %frem.compute19
+; GFX10-NEXT: .LBB10_11: ; %frem.compute52
; GFX10-NEXT: v_frexp_mant_f32_e32 v6, v8
; GFX10-NEXT: v_frexp_mant_f32_e32 v10, v7
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v9, v8
@@ -8747,10 +8739,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v10
; GFX10-NEXT: v_div_fixup_f32 v9, v9, v7, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX10-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX10-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX10-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v11, v8
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -8766,7 +8758,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.14: ; %Flow129
; GFX10-NEXT: v_mov_b32_e32 v10, s2
; GFX10-NEXT: v_mov_b32_e32 v8, v11
-; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX10-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX10-NEXT: v_add_nc_u32_e32 v10, -10, v10
; GFX10-NEXT: v_ldexp_f32 v8, v8, v10
; GFX10-NEXT: v_mul_f32_e32 v9, v8, v9
@@ -8783,7 +8775,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v8, |v1|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8
; GFX10-NEXT: s_cbranch_vccz .LBB10_18
-; GFX10-NEXT: ; %bb.17: ; %frem.else53
+; GFX10-NEXT: ; %bb.17: ; %frem.else20
; GFX10-NEXT: v_bfi_b32 v7, 0x7fff, 0, v3
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8
; GFX10-NEXT: v_cndmask_b32_e32 v7, v3, v7, vcc_lo
@@ -8791,7 +8783,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB10_24
; GFX10-NEXT: .LBB10_18:
; GFX10-NEXT: ; implicit-def: $vgpr7
-; GFX10-NEXT: .LBB10_19: ; %frem.compute52
+; GFX10-NEXT: .LBB10_19: ; %frem.compute19
; GFX10-NEXT: v_frexp_mant_f32_e32 v7, v9
; GFX10-NEXT: v_frexp_mant_f32_e32 v11, v8
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v10, v9
@@ -8818,10 +8810,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v11
; GFX10-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX10-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX10-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX10-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v12, v9
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -8837,7 +8829,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.22: ; %Flow125
; GFX10-NEXT: v_mov_b32_e32 v11, s2
; GFX10-NEXT: v_mov_b32_e32 v9, v12
-; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX10-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX10-NEXT: v_add_nc_u32_e32 v11, -10, v11
; GFX10-NEXT: v_ldexp_f32 v9, v9, v11
; GFX10-NEXT: v_mul_f32_e32 v10, v9, v10
@@ -8855,7 +8847,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cvt_f32_f16_e64 v11, |v8|
; GFX10-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v11, v10
; GFX10-NEXT: s_cbranch_vccz .LBB10_26
-; GFX10-NEXT: ; %bb.25: ; %frem.else86
+; GFX10-NEXT: ; %bb.25: ; %frem.else
; GFX10-NEXT: v_bfi_b32 v9, 0x7fff, 0, v8
; GFX10-NEXT: v_cmp_eq_f32_e32 vcc_lo, v11, v10
; GFX10-NEXT: v_cndmask_b32_e32 v9, v8, v9, vcc_lo
@@ -8863,7 +8855,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB10_32
; GFX10-NEXT: .LBB10_26:
; GFX10-NEXT: ; implicit-def: $vgpr9
-; GFX10-NEXT: .LBB10_27: ; %frem.compute85
+; GFX10-NEXT: .LBB10_27: ; %frem.compute
; GFX10-NEXT: v_frexp_mant_f32_e32 v9, v11
; GFX10-NEXT: v_frexp_mant_f32_e32 v13, v10
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v11
@@ -8890,10 +8882,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v13
; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX10-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 11
-; GFX10-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX10-NEXT: .LBB10_29: ; %frem.loop_body
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v14, v11
; GFX10-NEXT: s_add_i32 s2, s2, -11
@@ -8909,7 +8901,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.30: ; %Flow
; GFX10-NEXT: v_mov_b32_e32 v13, s2
; GFX10-NEXT: v_mov_b32_e32 v11, v14
-; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX10-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX10-NEXT: v_add_nc_u32_e32 v13, -10, v13
; GFX10-NEXT: v_ldexp_f32 v11, v11, v13
; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12
@@ -8963,7 +8955,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_2
-; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX11-TRUE16-NEXT: ; %bb.1: ; %frem.else86
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v4.l, v0.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
@@ -8974,7 +8966,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB10_8
; GFX11-TRUE16-NEXT: .LBB10_2:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4
-; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX11-TRUE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, v6
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v5
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6
@@ -9010,11 +9002,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX11-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v6
@@ -9034,7 +9026,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.6: ; %Flow133
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v8, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v9
-; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX11-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v8, -10, v8
; GFX11-TRUE16-NEXT: v_ldexp_f32 v6, v6, v8
@@ -9061,7 +9053,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_10
-; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX11-TRUE16-NEXT: ; %bb.9: ; %frem.else53
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v7.l, v5.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v10.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8
@@ -9072,7 +9064,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB10_16
; GFX11-TRUE16-NEXT: .LBB10_10:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr7
-; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX11-TRUE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v7, v9
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v8
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9
@@ -9108,11 +9100,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX11-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX11-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, v9
@@ -9132,7 +9124,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.14: ; %Flow129
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v11, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v9, v12
-; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX11-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v11, -10, v11
; GFX11-TRUE16-NEXT: v_ldexp_f32 v9, v9, v11
@@ -9156,7 +9148,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_18
-; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX11-TRUE16-NEXT: ; %bb.17: ; %frem.else20
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v8.l, v1.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9
@@ -9167,7 +9159,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB10_24
; GFX11-TRUE16-NEXT: .LBB10_18:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr8
-; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX11-TRUE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v8, v10
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v12, v9
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10
@@ -9203,11 +9195,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX11-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX11-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v10
@@ -9227,7 +9219,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.22: ; %Flow125
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v12, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v10, v13
-; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX11-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v12, -10, v12
; GFX11-TRUE16-NEXT: v_ldexp_f32 v10, v10, v12
@@ -9254,7 +9246,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB10_26
-; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX11-TRUE16-NEXT: ; %bb.25: ; %frem.else
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v11.l, v9.l
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v14.l, 0
; GFX11-TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12
@@ -9265,7 +9257,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_branch .LBB10_32
; GFX11-TRUE16-NEXT: .LBB10_26:
; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr11
-; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX11-TRUE16-NEXT: .LBB10_27: ; %frem.compute
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v11, v13
; GFX11-TRUE16-NEXT: v_frexp_mant_f32_e32 v15, v12
; GFX11-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13
@@ -9301,11 +9293,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-TRUE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX11-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX11-TRUE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX11-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX11-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v16, v13
@@ -9325,7 +9317,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: ; %bb.30: ; %Flow
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v15, s2
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v13, v16
-; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX11-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v15, -10, v15
; GFX11-TRUE16-NEXT: v_ldexp_f32 v13, v13, v15
@@ -9346,29 +9338,23 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: .LBB10_32: ; %Flow124
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v2.l
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l|
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v6.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v4.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v5.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v3.l
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v1.l|
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v10.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v8.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v9.l|
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v11.l, s2
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v4, 16, v1
-; GFX11-TRUE16-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v11.l, s2
+; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: frem_v4f16:
@@ -9388,7 +9374,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v6, v5
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_2
-; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX11-FAKE16-NEXT: ; %bb.1: ; %frem.else86
; GFX11-FAKE16-NEXT: v_bfi_b32 v4, 0x7fff, 0, v0
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v6, v5
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -9397,7 +9383,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB10_8
; GFX11-FAKE16-NEXT: .LBB10_2:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4
-; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX11-FAKE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, v6
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v5
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v7, v6
@@ -9433,11 +9419,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX11-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v6
@@ -9457,7 +9443,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.6: ; %Flow133
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v8, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v9
-; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX11-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v8, -10, v8
; GFX11-FAKE16-NEXT: v_ldexp_f32 v6, v6, v8
@@ -9483,7 +9469,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v9, v8
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_10
-; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX11-FAKE16-NEXT: ; %bb.9: ; %frem.else53
; GFX11-FAKE16-NEXT: v_bfi_b32 v7, 0x7fff, 0, v5
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v9, v8
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -9492,7 +9478,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB10_16
; GFX11-FAKE16-NEXT: .LBB10_10:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr7
-; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX11-FAKE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v7, v9
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v8
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v10, v9
@@ -9528,11 +9514,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX11-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX11-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, v9
@@ -9552,7 +9538,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.14: ; %Flow129
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v11, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v9, v12
-; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX11-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v11, -10, v11
; GFX11-FAKE16-NEXT: v_ldexp_f32 v9, v9, v11
@@ -9575,7 +9561,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v10, v9
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_18
-; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX11-FAKE16-NEXT: ; %bb.17: ; %frem.else20
; GFX11-FAKE16-NEXT: v_bfi_b32 v8, 0x7fff, 0, v1
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v10, v9
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -9584,7 +9570,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB10_24
; GFX11-FAKE16-NEXT: .LBB10_18:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr8
-; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX11-FAKE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v8, v10
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v12, v9
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v11, v10
@@ -9620,11 +9606,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX11-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX11-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v10
@@ -9644,7 +9630,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.22: ; %Flow125
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v12, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v10, v13
-; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX11-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v12, -10, v12
; GFX11-FAKE16-NEXT: v_ldexp_f32 v10, v10, v12
@@ -9670,7 +9656,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v13, v12
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB10_26
-; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX11-FAKE16-NEXT: ; %bb.25: ; %frem.else
; GFX11-FAKE16-NEXT: v_bfi_b32 v11, 0x7fff, 0, v9
; GFX11-FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v13, v12
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -9679,7 +9665,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_branch .LBB10_32
; GFX11-FAKE16-NEXT: .LBB10_26:
; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr11
-; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX11-FAKE16-NEXT: .LBB10_27: ; %frem.compute
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v11, v13
; GFX11-FAKE16-NEXT: v_frexp_mant_f32_e32 v15, v12
; GFX11-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v14, v13
@@ -9715,11 +9701,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-FAKE16-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX11-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX11-FAKE16-NEXT: s_sub_i32 s2, s2, s3
; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: s_add_i32 s2, s2, 11
-; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX11-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX11-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v16, v13
@@ -9739,7 +9725,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-FAKE16-NEXT: ; %bb.30: ; %Flow
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v15, s2
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v13, v16
-; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX11-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v15, -10, v15
; GFX11-FAKE16-NEXT: v_ldexp_f32 v13, v13, v15
@@ -9804,7 +9790,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2
-; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-TRUE16-NEXT: ; %bb.1: ; %frem.else86
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s8, s6
@@ -9816,7 +9802,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB10_8
; GFX1150-TRUE16-NEXT: .LBB10_2:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr0
-; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX1150-TRUE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -9851,11 +9837,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s6, s8, s6
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s6, s6, 11
-; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX1150-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v2
@@ -9877,7 +9863,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.6: ; %Flow133
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, s6
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX1150-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -9907,7 +9893,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10
-; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1150-TRUE16-NEXT: ; %bb.9: ; %frem.else53
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
@@ -9919,7 +9905,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB10_16
; GFX1150-TRUE16-NEXT: .LBB10_10:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr1
-; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX1150-TRUE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -9954,11 +9940,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11
-; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX1150-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, v3
@@ -9980,7 +9966,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.14: ; %Flow129
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, s9
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX1150-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -10008,7 +9994,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18
-; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX1150-TRUE16-NEXT: ; %bb.17: ; %frem.else20
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
@@ -10020,7 +10006,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB10_24
; GFX1150-TRUE16-NEXT: .LBB10_18:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr2
-; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX1150-TRUE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10
@@ -10055,11 +10041,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s9, s10, s9
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s9, s9, 11
-; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX1150-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, v4
@@ -10081,7 +10067,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.22: ; %Flow125
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v6, s9
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX1150-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -10111,7 +10097,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1150-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26
-; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX1150-TRUE16-NEXT: ; %bb.25: ; %frem.else
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
; GFX1150-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX1150-TRUE16-NEXT: s_cmp_eq_f32 s12, s11
@@ -10123,7 +10109,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_branch .LBB10_32
; GFX1150-TRUE16-NEXT: .LBB10_26:
; GFX1150-TRUE16-NEXT: ; implicit-def: $vgpr3
-; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX1150-TRUE16-NEXT: .LBB10_27: ; %frem.compute
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11
; GFX1150-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12
; GFX1150-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12
@@ -10158,11 +10144,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7
; GFX1150-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1150-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX1150-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1150-TRUE16-NEXT: s_sub_i32 s11, s12, s11
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_add_i32 s11, s11, 11
-; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX1150-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX1150-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v8, v5
@@ -10184,7 +10170,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: ; %bb.30: ; %Flow
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v7, s11
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v5, v8
-; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX1150-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7
; GFX1150-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7
@@ -10209,21 +10195,19 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s3, 0x7c00
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s3, s3, s4
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s6, 0
; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s3, s4, s3
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s2, 0
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
@@ -10232,13 +10216,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2
; GFX1150-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1150-TRUE16-NEXT: s_endpgm
;
@@ -10265,7 +10246,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2
-; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-FAKE16-NEXT: ; %bb.1: ; %frem.else86
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s8, s6
; GFX1150-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -10275,7 +10256,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB10_8
; GFX1150-FAKE16-NEXT: .LBB10_2:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr0
-; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX1150-FAKE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -10310,11 +10291,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s6, s8, s6
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s6, s6, 11
-; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX1150-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v2
@@ -10336,7 +10317,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.6: ; %Flow133
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, s6
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX1150-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -10365,7 +10346,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10
-; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1150-FAKE16-NEXT: ; %bb.9: ; %frem.else53
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1150-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -10375,7 +10356,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB10_16
; GFX1150-FAKE16-NEXT: .LBB10_10:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr1
-; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX1150-FAKE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -10410,11 +10391,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11
-; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX1150-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, v3
@@ -10436,7 +10417,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.14: ; %Flow129
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, s9
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX1150-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -10463,7 +10444,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18
-; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX1150-FAKE16-NEXT: ; %bb.17: ; %frem.else20
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1150-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -10473,7 +10454,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB10_24
; GFX1150-FAKE16-NEXT: .LBB10_18:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr2
-; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX1150-FAKE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10
@@ -10508,11 +10489,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s9, s10, s9
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s9, s9, 11
-; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX1150-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, v4
@@ -10534,7 +10515,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.22: ; %Flow125
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v6, s9
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX1150-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -10563,7 +10544,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1150-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1150-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26
-; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX1150-FAKE16-NEXT: ; %bb.25: ; %frem.else
; GFX1150-FAKE16-NEXT: s_cmp_eq_f32 s12, s11
; GFX1150-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10
; GFX1150-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -10573,7 +10554,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: s_branch .LBB10_32
; GFX1150-FAKE16-NEXT: .LBB10_26:
; GFX1150-FAKE16-NEXT: ; implicit-def: $vgpr3
-; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX1150-FAKE16-NEXT: .LBB10_27: ; %frem.compute
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11
; GFX1150-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12
; GFX1150-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12
@@ -10608,11 +10589,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7
; GFX1150-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1150-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX1150-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1150-FAKE16-NEXT: s_sub_i32 s11, s12, s11
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: s_add_i32 s11, s11, 11
-; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX1150-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX1150-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v8, v5
@@ -10634,7 +10615,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-FAKE16-NEXT: ; %bb.30: ; %Flow
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v7, s11
; GFX1150-FAKE16-NEXT: v_mov_b32_e32 v5, v8
-; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX1150-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX1150-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7
; GFX1150-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7
@@ -10712,7 +10693,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_2
-; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-TRUE16-NEXT: ; %bb.1: ; %frem.else86
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v0.l, s5
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s8, s6
@@ -10724,7 +10705,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB10_8
; GFX1200-TRUE16-NEXT: .LBB10_2:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr0
-; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX1200-TRUE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s6
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v0, s8
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -10759,11 +10740,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s6, s8, s6
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s6, s6, 11
-; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX1200-TRUE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v2
@@ -10787,7 +10768,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.6: ; %Flow133
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, s6
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX1200-TRUE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -10821,7 +10802,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_10
-; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1200-TRUE16-NEXT: ; %bb.9: ; %frem.else53
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v1.l, s8
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
@@ -10833,7 +10814,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB10_16
; GFX1200-TRUE16-NEXT: .LBB10_10:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr1
-; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX1200-TRUE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s9
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v1, s10
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -10869,11 +10850,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11
-; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX1200-TRUE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, v3
@@ -10897,7 +10878,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.14: ; %Flow129
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, s9
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX1200-TRUE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -10928,7 +10909,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_18
-; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX1200-TRUE16-NEXT: ; %bb.17: ; %frem.else20
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v2.l, s7
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s10, s9
@@ -10941,7 +10922,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB10_24
; GFX1200-TRUE16-NEXT: .LBB10_18:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr2
-; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX1200-TRUE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s9
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v2, s10
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10
@@ -10977,11 +10958,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s9, s10, s9
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s9, s9, 11
-; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX1200-TRUE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, v4
@@ -11005,7 +10986,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.22: ; %Flow125
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v6, s9
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX1200-TRUE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -11039,7 +11020,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-TRUE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1200-TRUE16-NEXT: s_cbranch_scc0 .LBB10_26
-; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX1200-TRUE16-NEXT: ; %bb.25: ; %frem.else
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v3.l, s10
; GFX1200-TRUE16-NEXT: v_mov_b16_e32 v4.l, 0
; GFX1200-TRUE16-NEXT: s_cmp_eq_f32 s12, s11
@@ -11051,7 +11032,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_branch .LBB10_32
; GFX1200-TRUE16-NEXT: .LBB10_26:
; GFX1200-TRUE16-NEXT: ; implicit-def: $vgpr3
-; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX1200-TRUE16-NEXT: .LBB10_27: ; %frem.compute
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v4, s11
; GFX1200-TRUE16-NEXT: v_frexp_mant_f32_e32 v3, s12
; GFX1200-TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12
@@ -11087,11 +11068,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7
; GFX1200-TRUE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1200-TRUE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX1200-TRUE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1200-TRUE16-NEXT: s_sub_co_i32 s11, s12, s11
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_add_co_i32 s11, s11, 11
-; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX1200-TRUE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX1200-TRUE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v8, v5
@@ -11115,7 +11096,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: ; %bb.30: ; %Flow
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v7, s11
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v5, v8
-; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX1200-TRUE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-TRUE16-NEXT: v_add_nc_u32_e32 v7, -10, v7
; GFX1200-TRUE16-NEXT: v_ldexp_f32 v5, v5, v7
@@ -11147,18 +11128,14 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s3, s4, s3
; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s2, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
@@ -11168,15 +11145,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2
; GFX1200-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1200-TRUE16-NEXT: s_endpgm
;
@@ -11203,7 +11176,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s8, s6
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_2
-; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-FAKE16-NEXT: ; %bb.1: ; %frem.else86
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s8, s6
; GFX1200-FAKE16-NEXT: v_bfi_b32 v0, 0x7fff, 0, s5
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -11213,7 +11186,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB10_8
; GFX1200-FAKE16-NEXT: .LBB10_2:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr0
-; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute
+; GFX1200-FAKE16-NEXT: .LBB10_3: ; %frem.compute85
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s6
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v0, s8
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -11249,11 +11222,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v4
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_7
-; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.4: ; %frem.loop_body93.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s6, s8, s6
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s6, s6, 11
-; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body
+; GFX1200-FAKE16-NEXT: .LBB10_5: ; %frem.loop_body93
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v2
@@ -11277,7 +11250,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.6: ; %Flow133
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, s6
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit
+; GFX1200-FAKE16-NEXT: .LBB10_7: ; %frem.loop_exit94
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v4, -10, v4
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v2, v2, v4
@@ -11310,7 +11283,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_10
-; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else20
+; GFX1200-FAKE16-NEXT: ; %bb.9: ; %frem.else53
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1200-FAKE16-NEXT: v_bfi_b32 v1, 0x7fff, 0, s8
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -11321,7 +11294,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB10_16
; GFX1200-FAKE16-NEXT: .LBB10_10:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr1
-; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute19
+; GFX1200-FAKE16-NEXT: .LBB10_11: ; %frem.compute52
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s9
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v1, s10
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -11357,11 +11330,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v5
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_15
-; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body27.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.12: ; %frem.loop_body60.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11
-; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body27
+; GFX1200-FAKE16-NEXT: .LBB10_13: ; %frem.loop_body60
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, v3
@@ -11385,7 +11358,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.14: ; %Flow129
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, s9
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit28
+; GFX1200-FAKE16-NEXT: .LBB10_15: ; %frem.loop_exit61
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v5, -10, v5
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v3, v3, v5
@@ -11415,7 +11388,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s10, s9
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_18
-; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else53
+; GFX1200-FAKE16-NEXT: ; %bb.17: ; %frem.else20
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s10, s9
; GFX1200-FAKE16-NEXT: v_bfi_b32 v2, 0x7fff, 0, s7
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -11426,7 +11399,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB10_24
; GFX1200-FAKE16-NEXT: .LBB10_18:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr2
-; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute52
+; GFX1200-FAKE16-NEXT: .LBB10_19: ; %frem.compute19
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s9
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v2, s10
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v5, s10
@@ -11462,11 +11435,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v6
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_23
-; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body60.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.20: ; %frem.loop_body27.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s9, s10, s9
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s9, s9, 11
-; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body60
+; GFX1200-FAKE16-NEXT: .LBB10_21: ; %frem.loop_body27
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, v4
@@ -11490,7 +11463,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.22: ; %Flow125
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v6, s9
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v4, v7
-; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit61
+; GFX1200-FAKE16-NEXT: .LBB10_23: ; %frem.loop_exit28
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v6, -10, v6
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v4, v4, v6
@@ -11523,7 +11496,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_2)
; GFX1200-FAKE16-NEXT: s_cmp_ngt_f32 s12, s11
; GFX1200-FAKE16-NEXT: s_cbranch_scc0 .LBB10_26
-; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else86
+; GFX1200-FAKE16-NEXT: ; %bb.25: ; %frem.else
; GFX1200-FAKE16-NEXT: s_cmp_eq_f32 s12, s11
; GFX1200-FAKE16-NEXT: v_bfi_b32 v3, 0x7fff, 0, s10
; GFX1200-FAKE16-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -11534,7 +11507,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: s_branch .LBB10_32
; GFX1200-FAKE16-NEXT: .LBB10_26:
; GFX1200-FAKE16-NEXT: ; implicit-def: $vgpr3
-; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute85
+; GFX1200-FAKE16-NEXT: .LBB10_27: ; %frem.compute
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v4, s11
; GFX1200-FAKE16-NEXT: v_frexp_mant_f32_e32 v3, s12
; GFX1200-FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v6, s12
@@ -11570,11 +11543,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: v_cmp_gt_i32_e32 vcc_lo, 12, v7
; GFX1200-FAKE16-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1200-FAKE16-NEXT: s_cbranch_vccnz .LBB10_31
-; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body93.preheader
+; GFX1200-FAKE16-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1200-FAKE16-NEXT: s_sub_co_i32 s11, s12, s11
; GFX1200-FAKE16-NEXT: s_wait_alu 0xfffe
; GFX1200-FAKE16-NEXT: s_add_co_i32 s11, s11, 11
-; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body93
+; GFX1200-FAKE16-NEXT: .LBB10_29: ; %frem.loop_body
; GFX1200-FAKE16-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v8, v5
@@ -11598,7 +11571,7 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-FAKE16-NEXT: ; %bb.30: ; %Flow
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v7, s11
; GFX1200-FAKE16-NEXT: v_mov_b32_e32 v5, v8
-; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit94
+; GFX1200-FAKE16-NEXT: .LBB10_31: ; %frem.loop_exit
; GFX1200-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-FAKE16-NEXT: v_add_nc_u32_e32 v7, -10, v7
; GFX1200-FAKE16-NEXT: v_ldexp_f32 v5, v5, v7
@@ -11686,7 +11659,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB11_2
-; SI-NEXT: ; %bb.1: ; %frem.else
+; SI-NEXT: ; %bb.1: ; %frem.else16
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v4, s2, 0, v0
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2|
@@ -11697,7 +11670,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB11_2:
; SI-NEXT: ; implicit-def: $vgpr4
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB11_3: ; %frem.compute
+; SI-NEXT: .LBB11_3: ; %frem.compute15
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v4, v0
@@ -11733,10 +11706,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB11_7
-; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB11_5: ; %frem.loop_body
+; SI-NEXT: .LBB11_5: ; %frem.loop_body23
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v7, v5
; SI-NEXT: v_mul_f32_e32 v5, v7, v6
@@ -11751,7 +11724,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB11_5
; SI-NEXT: ; %bb.6: ; %Flow51
; SI-NEXT: v_mov_b32_e32 v5, v7
-; SI-NEXT: .LBB11_7: ; %frem.loop_exit
+; SI-NEXT: .LBB11_7: ; %frem.loop_exit24
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v5, v5, s3
; SI-NEXT: v_mul_f32_e32 v6, v5, v6
@@ -11767,7 +11740,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB11_10
-; SI-NEXT: ; %bb.9: ; %frem.else16
+; SI-NEXT: ; %bb.9: ; %frem.else
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v5, s2, 0, v1
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3|
@@ -11778,7 +11751,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB11_10:
; SI-NEXT: ; implicit-def: $vgpr5
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB11_11: ; %frem.compute15
+; SI-NEXT: .LBB11_11: ; %frem.compute
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v5, v1
@@ -11814,10 +11787,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB11_15
-; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB11_13: ; %frem.loop_body23
+; SI-NEXT: .LBB11_13: ; %frem.loop_body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v8, v6
; SI-NEXT: v_mul_f32_e32 v6, v8, v7
@@ -11832,7 +11805,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB11_13
; SI-NEXT: ; %bb.14: ; %Flow
; SI-NEXT: v_mov_b32_e32 v6, v8
-; SI-NEXT: .LBB11_15: ; %frem.loop_exit24
+; SI-NEXT: .LBB11_15: ; %frem.loop_exit
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v6, v6, s3
; SI-NEXT: v_mul_f32_e32 v7, v6, v7
@@ -11877,7 +11850,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB11_2
-; CI-NEXT: ; %bb.1: ; %frem.else
+; CI-NEXT: ; %bb.1: ; %frem.else16
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v4, s2, 0, v0
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2|
@@ -11886,7 +11859,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB11_8
; CI-NEXT: .LBB11_2:
; CI-NEXT: ; implicit-def: $vgpr4
-; CI-NEXT: .LBB11_3: ; %frem.compute
+; CI-NEXT: .LBB11_3: ; %frem.compute15
; CI-NEXT: v_frexp_mant_f32_e64 v5, |v2|
; CI-NEXT: v_ldexp_f32_e64 v5, v5, 1
; CI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0
@@ -11911,10 +11884,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6
; CI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB11_7
-; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; CI-NEXT: v_sub_i32_e32 v6, vcc, v9, v10
; CI-NEXT: v_add_i32_e32 v6, vcc, 12, v6
-; CI-NEXT: .LBB11_5: ; %frem.loop_body
+; CI-NEXT: .LBB11_5: ; %frem.loop_body23
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v9, v7
; CI-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -11929,7 +11902,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB11_5
; CI-NEXT: ; %bb.6: ; %Flow51
; CI-NEXT: v_mov_b32_e32 v7, v9
-; CI-NEXT: .LBB11_7: ; %frem.loop_exit
+; CI-NEXT: .LBB11_7: ; %frem.loop_exit24
; CI-NEXT: v_add_i32_e32 v6, vcc, -11, v6
; CI-NEXT: v_ldexp_f32_e32 v6, v7, v6
; CI-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -11945,7 +11918,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB11_10
-; CI-NEXT: ; %bb.9: ; %frem.else16
+; CI-NEXT: ; %bb.9: ; %frem.else
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v5, s2, 0, v1
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3|
@@ -11954,7 +11927,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB11_16
; CI-NEXT: .LBB11_10:
; CI-NEXT: ; implicit-def: $vgpr5
-; CI-NEXT: .LBB11_11: ; %frem.compute15
+; CI-NEXT: .LBB11_11: ; %frem.compute
; CI-NEXT: v_frexp_mant_f32_e64 v6, |v3|
; CI-NEXT: v_ldexp_f32_e64 v6, v6, 1
; CI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0
@@ -11979,10 +11952,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7
; CI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB11_15
-; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; CI-NEXT: v_sub_i32_e32 v7, vcc, v10, v11
; CI-NEXT: v_add_i32_e32 v7, vcc, 12, v7
-; CI-NEXT: .LBB11_13: ; %frem.loop_body23
+; CI-NEXT: .LBB11_13: ; %frem.loop_body
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v10, v8
; CI-NEXT: v_mul_f32_e32 v8, v10, v9
@@ -11997,7 +11970,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB11_13
; CI-NEXT: ; %bb.14: ; %Flow
; CI-NEXT: v_mov_b32_e32 v8, v10
-; CI-NEXT: .LBB11_15: ; %frem.loop_exit24
+; CI-NEXT: .LBB11_15: ; %frem.loop_exit
; CI-NEXT: v_add_i32_e32 v7, vcc, -11, v7
; CI-NEXT: v_ldexp_f32_e32 v7, v8, v7
; CI-NEXT: v_mul_f32_e32 v8, v7, v9
@@ -12042,7 +12015,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB11_2
-; VI-NEXT: ; %bb.1: ; %frem.else
+; VI-NEXT: ; %bb.1: ; %frem.else16
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v4, s2, 0, v0
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2|
@@ -12051,7 +12024,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB11_8
; VI-NEXT: .LBB11_2:
; VI-NEXT: ; implicit-def: $vgpr4
-; VI-NEXT: .LBB11_3: ; %frem.compute
+; VI-NEXT: .LBB11_3: ; %frem.compute15
; VI-NEXT: v_frexp_mant_f32_e64 v5, |v2|
; VI-NEXT: v_ldexp_f32 v5, v5, 1
; VI-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0
@@ -12076,10 +12049,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6
; VI-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB11_7
-; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; VI-NEXT: v_sub_u32_e32 v6, vcc, v9, v10
; VI-NEXT: v_add_u32_e32 v6, vcc, 12, v6
-; VI-NEXT: .LBB11_5: ; %frem.loop_body
+; VI-NEXT: .LBB11_5: ; %frem.loop_body23
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v9, v7
; VI-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -12094,7 +12067,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB11_5
; VI-NEXT: ; %bb.6: ; %Flow51
; VI-NEXT: v_mov_b32_e32 v7, v9
-; VI-NEXT: .LBB11_7: ; %frem.loop_exit
+; VI-NEXT: .LBB11_7: ; %frem.loop_exit24
; VI-NEXT: v_add_u32_e32 v6, vcc, -11, v6
; VI-NEXT: v_ldexp_f32 v6, v7, v6
; VI-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -12110,7 +12083,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB11_10
-; VI-NEXT: ; %bb.9: ; %frem.else16
+; VI-NEXT: ; %bb.9: ; %frem.else
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v5, s2, 0, v1
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3|
@@ -12119,7 +12092,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB11_16
; VI-NEXT: .LBB11_10:
; VI-NEXT: ; implicit-def: $vgpr5
-; VI-NEXT: .LBB11_11: ; %frem.compute15
+; VI-NEXT: .LBB11_11: ; %frem.compute
; VI-NEXT: v_frexp_mant_f32_e64 v6, |v3|
; VI-NEXT: v_ldexp_f32 v6, v6, 1
; VI-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0
@@ -12144,10 +12117,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7
; VI-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB11_15
-; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; VI-NEXT: v_sub_u32_e32 v7, vcc, v10, v11
; VI-NEXT: v_add_u32_e32 v7, vcc, 12, v7
-; VI-NEXT: .LBB11_13: ; %frem.loop_body23
+; VI-NEXT: .LBB11_13: ; %frem.loop_body
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v10, v8
; VI-NEXT: v_mul_f32_e32 v8, v10, v9
@@ -12162,7 +12135,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB11_13
; VI-NEXT: ; %bb.14: ; %Flow
; VI-NEXT: v_mov_b32_e32 v8, v10
-; VI-NEXT: .LBB11_15: ; %frem.loop_exit24
+; VI-NEXT: .LBB11_15: ; %frem.loop_exit
; VI-NEXT: v_add_u32_e32 v7, vcc, -11, v7
; VI-NEXT: v_ldexp_f32 v7, v8, v7
; VI-NEXT: v_mul_f32_e32 v8, v7, v9
@@ -12202,7 +12175,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v2|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB11_2
-; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: ; %bb.1: ; %frem.else16
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v4, s2, 0, v0
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v2|
@@ -12211,7 +12184,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB11_8
; GFX9-NEXT: .LBB11_2:
; GFX9-NEXT: ; implicit-def: $vgpr4
-; GFX9-NEXT: .LBB11_3: ; %frem.compute
+; GFX9-NEXT: .LBB11_3: ; %frem.compute15
; GFX9-NEXT: v_frexp_mant_f32_e64 v5, |v2|
; GFX9-NEXT: v_ldexp_f32 v5, v5, 1
; GFX9-NEXT: v_div_scale_f32 v11, s[2:3], v5, v5, 1.0
@@ -12236,10 +12209,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v6
; GFX9-NEXT: v_div_fixup_f32 v8, v8, v5, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB11_7
-; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX9-NEXT: v_sub_u32_e32 v6, v9, v10
; GFX9-NEXT: v_add_u32_e32 v6, 12, v6
-; GFX9-NEXT: .LBB11_5: ; %frem.loop_body
+; GFX9-NEXT: .LBB11_5: ; %frem.loop_body23
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v9, v7
; GFX9-NEXT: v_mul_f32_e32 v7, v9, v8
@@ -12254,7 +12227,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB11_5
; GFX9-NEXT: ; %bb.6: ; %Flow51
; GFX9-NEXT: v_mov_b32_e32 v7, v9
-; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit
+; GFX9-NEXT: .LBB11_7: ; %frem.loop_exit24
; GFX9-NEXT: v_add_u32_e32 v6, -11, v6
; GFX9-NEXT: v_ldexp_f32 v6, v7, v6
; GFX9-NEXT: v_mul_f32_e32 v7, v6, v8
@@ -12270,7 +12243,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v3|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB11_10
-; GFX9-NEXT: ; %bb.9: ; %frem.else16
+; GFX9-NEXT: ; %bb.9: ; %frem.else
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v5, s2, 0, v1
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v3|
@@ -12279,7 +12252,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB11_16
; GFX9-NEXT: .LBB11_10:
; GFX9-NEXT: ; implicit-def: $vgpr5
-; GFX9-NEXT: .LBB11_11: ; %frem.compute15
+; GFX9-NEXT: .LBB11_11: ; %frem.compute
; GFX9-NEXT: v_frexp_mant_f32_e64 v6, |v3|
; GFX9-NEXT: v_ldexp_f32 v6, v6, 1
; GFX9-NEXT: v_div_scale_f32 v12, s[2:3], v6, v6, 1.0
@@ -12304,10 +12277,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v7
; GFX9-NEXT: v_div_fixup_f32 v9, v9, v6, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB11_15
-; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX9-NEXT: v_sub_u32_e32 v7, v10, v11
; GFX9-NEXT: v_add_u32_e32 v7, 12, v7
-; GFX9-NEXT: .LBB11_13: ; %frem.loop_body23
+; GFX9-NEXT: .LBB11_13: ; %frem.loop_body
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v10, v8
; GFX9-NEXT: v_mul_f32_e32 v8, v10, v9
@@ -12322,7 +12295,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB11_13
; GFX9-NEXT: ; %bb.14: ; %Flow
; GFX9-NEXT: v_mov_b32_e32 v8, v10
-; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit24
+; GFX9-NEXT: .LBB11_15: ; %frem.loop_exit
; GFX9-NEXT: v_add_u32_e32 v7, -11, v7
; GFX9-NEXT: v_ldexp_f32 v7, v8, v7
; GFX9-NEXT: v_mul_f32_e32 v8, v7, v9
@@ -12363,7 +12336,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB11_2
-; GFX10-NEXT: ; %bb.1: ; %frem.else
+; GFX10-NEXT: ; %bb.1: ; %frem.else16
; GFX10-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2|
; GFX10-NEXT: v_cndmask_b32_e32 v4, v0, v4, vcc_lo
@@ -12371,7 +12344,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB11_8
; GFX10-NEXT: .LBB11_2:
; GFX10-NEXT: ; implicit-def: $vgpr4
-; GFX10-NEXT: .LBB11_3: ; %frem.compute
+; GFX10-NEXT: .LBB11_3: ; %frem.compute15
; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v2|
; GFX10-NEXT: v_frexp_mant_f32_e64 v4, |v0|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
@@ -12398,10 +12371,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v8
; GFX10-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB11_7
-; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB11_5: ; %frem.loop_body
+; GFX10-NEXT: .LBB11_5: ; %frem.loop_body23
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v9, v6
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -12417,7 +12390,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.6: ; %Flow51
; GFX10-NEXT: v_mov_b32_e32 v8, s2
; GFX10-NEXT: v_mov_b32_e32 v6, v9
-; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit
+; GFX10-NEXT: .LBB11_7: ; %frem.loop_exit24
; GFX10-NEXT: v_add_nc_u32_e32 v8, -11, v8
; GFX10-NEXT: v_ldexp_f32 v6, v6, v8
; GFX10-NEXT: v_mul_f32_e32 v7, v6, v7
@@ -12432,7 +12405,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB11_10
-; GFX10-NEXT: ; %bb.9: ; %frem.else16
+; GFX10-NEXT: ; %bb.9: ; %frem.else
; GFX10-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3|
; GFX10-NEXT: v_cndmask_b32_e32 v5, v1, v5, vcc_lo
@@ -12440,7 +12413,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB11_16
; GFX10-NEXT: .LBB11_10:
; GFX10-NEXT: ; implicit-def: $vgpr5
-; GFX10-NEXT: .LBB11_11: ; %frem.compute15
+; GFX10-NEXT: .LBB11_11: ; %frem.compute
; GFX10-NEXT: v_frexp_mant_f32_e64 v6, |v3|
; GFX10-NEXT: v_frexp_mant_f32_e64 v5, |v1|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v8, v1
@@ -12467,10 +12440,10 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v9
; GFX10-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB11_15
-; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB11_13: ; %frem.loop_body23
+; GFX10-NEXT: .LBB11_13: ; %frem.loop_body
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v10, v7
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -12486,7 +12459,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.14: ; %Flow
; GFX10-NEXT: v_mov_b32_e32 v9, s2
; GFX10-NEXT: v_mov_b32_e32 v7, v10
-; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit24
+; GFX10-NEXT: .LBB11_15: ; %frem.loop_exit
; GFX10-NEXT: v_add_nc_u32_e32 v9, -11, v9
; GFX10-NEXT: v_ldexp_f32 v7, v7, v9
; GFX10-NEXT: v_mul_f32_e32 v8, v7, v8
@@ -12524,7 +12497,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v2|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB11_2
-; GFX11-NEXT: ; %bb.1: ; %frem.else
+; GFX11-NEXT: ; %bb.1: ; %frem.else16
; GFX11-NEXT: v_bfi_b32 v4, 0x7fffffff, 0, v0
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v2|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -12533,7 +12506,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB11_8
; GFX11-NEXT: .LBB11_2:
; GFX11-NEXT: ; implicit-def: $vgpr4
-; GFX11-NEXT: .LBB11_3: ; %frem.compute
+; GFX11-NEXT: .LBB11_3: ; %frem.compute15
; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v2|
; GFX11-NEXT: v_frexp_mant_f32_e64 v4, |v0|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v7, v0
@@ -12569,11 +12542,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v7, v7, v5, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB11_7
-; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB11_5: ; %frem.loop_body
+; GFX11-NEXT: .LBB11_5: ; %frem.loop_body23
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v9, v6
@@ -12593,7 +12566,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.6: ; %Flow51
; GFX11-NEXT: v_mov_b32_e32 v8, s2
; GFX11-NEXT: v_mov_b32_e32 v6, v9
-; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit
+; GFX11-NEXT: .LBB11_7: ; %frem.loop_exit24
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v8, -11, v8
; GFX11-NEXT: v_ldexp_f32 v6, v6, v8
@@ -12613,7 +12586,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v3|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB11_10
-; GFX11-NEXT: ; %bb.9: ; %frem.else16
+; GFX11-NEXT: ; %bb.9: ; %frem.else
; GFX11-NEXT: v_bfi_b32 v5, 0x7fffffff, 0, v1
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v3|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -12622,7 +12595,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB11_16
; GFX11-NEXT: .LBB11_10:
; GFX11-NEXT: ; implicit-def: $vgpr5
-; GFX11-NEXT: .LBB11_11: ; %frem.compute15
+; GFX11-NEXT: .LBB11_11: ; %frem.compute
; GFX11-NEXT: v_frexp_mant_f32_e64 v6, |v3|
; GFX11-NEXT: v_frexp_mant_f32_e64 v5, |v1|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v8, v1
@@ -12658,11 +12631,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v8, v8, v6, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB11_15
-; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB11_13: ; %frem.loop_body23
+; GFX11-NEXT: .LBB11_13: ; %frem.loop_body
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v10, v7
@@ -12682,7 +12655,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.14: ; %Flow
; GFX11-NEXT: v_mov_b32_e32 v9, s2
; GFX11-NEXT: v_mov_b32_e32 v7, v10
-; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit24
+; GFX11-NEXT: .LBB11_15: ; %frem.loop_exit
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v9, -11, v9
; GFX11-NEXT: v_ldexp_f32 v7, v7, v9
@@ -12730,7 +12703,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s3, s8
; GFX1150-NEXT: s_cbranch_scc0 .LBB11_2
-; GFX1150-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-NEXT: ; %bb.1: ; %frem.else16
; GFX1150-NEXT: s_cmp_eq_f32 s3, s8
; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -12740,7 +12713,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB11_8
; GFX1150-NEXT: .LBB11_2:
; GFX1150-NEXT: ; implicit-def: $vgpr0
-; GFX1150-NEXT: .LBB11_3: ; %frem.compute
+; GFX1150-NEXT: .LBB11_3: ; %frem.compute15
; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s4|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s6|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -12775,11 +12748,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB11_7
-; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX1150-NEXT: s_sub_i32 s7, s7, s8
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s7, s7, 12
-; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body
+; GFX1150-NEXT: .LBB11_5: ; %frem.loop_body23
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v5, v2
@@ -12801,7 +12774,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.6: ; %Flow51
; GFX1150-NEXT: v_mov_b32_e32 v4, s7
; GFX1150-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit
+; GFX1150-NEXT: .LBB11_7: ; %frem.loop_exit24
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4
; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4
@@ -12824,7 +12797,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s6, s8
; GFX1150-NEXT: s_cbranch_scc0 .LBB11_10
-; GFX1150-NEXT: ; %bb.9: ; %frem.else16
+; GFX1150-NEXT: ; %bb.9: ; %frem.else
; GFX1150-NEXT: s_cmp_eq_f32 s6, s8
; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -12834,7 +12807,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB11_16
; GFX1150-NEXT: .LBB11_10:
; GFX1150-NEXT: ; implicit-def: $vgpr1
-; GFX1150-NEXT: .LBB11_11: ; %frem.compute15
+; GFX1150-NEXT: .LBB11_11: ; %frem.compute
; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s2|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s5|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s5
@@ -12869,11 +12842,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5
; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB11_15
-; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1150-NEXT: s_sub_i32 s7, s7, s8
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s7, s7, 12
-; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body23
+; GFX1150-NEXT: .LBB11_13: ; %frem.loop_body
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v6, v3
@@ -12895,7 +12868,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.14: ; %Flow
; GFX1150-NEXT: v_mov_b32_e32 v5, s7
; GFX1150-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit24
+; GFX1150-NEXT: .LBB11_15: ; %frem.loop_exit
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5
; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5
@@ -12950,7 +12923,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-NEXT: s_cmp_ngt_f32 s3, s8
; GFX1200-NEXT: s_cbranch_scc0 .LBB11_2
-; GFX1200-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-NEXT: ; %bb.1: ; %frem.else16
; GFX1200-NEXT: s_cmp_eq_f32 s3, s8
; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s6
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -12960,7 +12933,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB11_8
; GFX1200-NEXT: .LBB11_2:
; GFX1200-NEXT: ; implicit-def: $vgpr0
-; GFX1200-NEXT: .LBB11_3: ; %frem.compute
+; GFX1200-NEXT: .LBB11_3: ; %frem.compute15
; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s4|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s6|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s6
@@ -12996,11 +12969,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB11_7
-; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s7, s7, 12
-; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body
+; GFX1200-NEXT: .LBB11_5: ; %frem.loop_body23
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-NEXT: v_mov_b32_e32 v5, v2
@@ -13024,7 +12997,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.6: ; %Flow51
; GFX1200-NEXT: v_mov_b32_e32 v4, s7
; GFX1200-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit
+; GFX1200-NEXT: .LBB11_7: ; %frem.loop_exit24
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4
; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4
@@ -13048,7 +13021,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_ngt_f32 s6, s8
; GFX1200-NEXT: s_cbranch_scc0 .LBB11_10
-; GFX1200-NEXT: ; %bb.9: ; %frem.else16
+; GFX1200-NEXT: ; %bb.9: ; %frem.else
; GFX1200-NEXT: s_cmp_eq_f32 s6, s8
; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s5
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -13059,7 +13032,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB11_16
; GFX1200-NEXT: .LBB11_10:
; GFX1200-NEXT: ; implicit-def: $vgpr1
-; GFX1200-NEXT: .LBB11_11: ; %frem.compute15
+; GFX1200-NEXT: .LBB11_11: ; %frem.compute
; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s2|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s5|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s5
@@ -13095,11 +13068,11 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5
; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB11_15
-; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1200-NEXT: s_sub_co_i32 s7, s7, s8
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s7, s7, 12
-; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body23
+; GFX1200-NEXT: .LBB11_13: ; %frem.loop_body
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-NEXT: v_mov_b32_e32 v6, v3
@@ -13123,7 +13096,7 @@ define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.14: ; %Flow
; GFX1200-NEXT: v_mov_b32_e32 v5, s7
; GFX1200-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit24
+; GFX1200-NEXT: .LBB11_15: ; %frem.loop_exit
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5
; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5
@@ -13187,7 +13160,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB12_2
-; SI-NEXT: ; %bb.1: ; %frem.else
+; SI-NEXT: ; %bb.1: ; %frem.else78
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v8, s2, 0, v0
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4|
@@ -13198,7 +13171,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB12_2:
; SI-NEXT: ; implicit-def: $vgpr8
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB12_3: ; %frem.compute
+; SI-NEXT: .LBB12_3: ; %frem.compute77
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v0|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v8, v0
@@ -13234,10 +13207,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v10, v10, v8, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB12_7
-; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; SI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB12_5: ; %frem.loop_body
+; SI-NEXT: .LBB12_5: ; %frem.loop_body85
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v11, v9
; SI-NEXT: v_mul_f32_e32 v9, v11, v10
@@ -13252,7 +13225,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB12_5
; SI-NEXT: ; %bb.6: ; %Flow125
; SI-NEXT: v_mov_b32_e32 v9, v11
-; SI-NEXT: .LBB12_7: ; %frem.loop_exit
+; SI-NEXT: .LBB12_7: ; %frem.loop_exit86
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v9, v9, s3
; SI-NEXT: v_mul_f32_e32 v10, v9, v10
@@ -13268,7 +13241,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB12_10
-; SI-NEXT: ; %bb.9: ; %frem.else16
+; SI-NEXT: ; %bb.9: ; %frem.else47
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v9, s2, 0, v1
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5|
@@ -13279,7 +13252,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB12_10:
; SI-NEXT: ; implicit-def: $vgpr9
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB12_11: ; %frem.compute15
+; SI-NEXT: .LBB12_11: ; %frem.compute46
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v1|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v9, v1
@@ -13315,10 +13288,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB12_15
-; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; SI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB12_13: ; %frem.loop_body23
+; SI-NEXT: .LBB12_13: ; %frem.loop_body54
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v12, v10
; SI-NEXT: v_mul_f32_e32 v10, v12, v11
@@ -13333,7 +13306,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB12_13
; SI-NEXT: ; %bb.14: ; %Flow121
; SI-NEXT: v_mov_b32_e32 v10, v12
-; SI-NEXT: .LBB12_15: ; %frem.loop_exit24
+; SI-NEXT: .LBB12_15: ; %frem.loop_exit55
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v10, v10, s3
; SI-NEXT: v_mul_f32_e32 v11, v10, v11
@@ -13349,7 +13322,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB12_18
-; SI-NEXT: ; %bb.17: ; %frem.else47
+; SI-NEXT: ; %bb.17: ; %frem.else16
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v10, s2, 0, v2
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6|
@@ -13360,7 +13333,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB12_18:
; SI-NEXT: ; implicit-def: $vgpr10
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB12_19: ; %frem.compute46
+; SI-NEXT: .LBB12_19: ; %frem.compute15
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v2|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v10, v2
@@ -13396,10 +13369,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB12_23
-; SI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; SI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB12_21: ; %frem.loop_body54
+; SI-NEXT: .LBB12_21: ; %frem.loop_body23
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v13, v11
; SI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -13414,7 +13387,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB12_21
; SI-NEXT: ; %bb.22: ; %Flow117
; SI-NEXT: v_mov_b32_e32 v11, v13
-; SI-NEXT: .LBB12_23: ; %frem.loop_exit55
+; SI-NEXT: .LBB12_23: ; %frem.loop_exit24
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v11, v11, s3
; SI-NEXT: v_mul_f32_e32 v12, v11, v12
@@ -13430,7 +13403,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7|
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_cbranch_vccz .LBB12_26
-; SI-NEXT: ; %bb.25: ; %frem.else78
+; SI-NEXT: ; %bb.25: ; %frem.else
; SI-NEXT: s_brev_b32 s2, -2
; SI-NEXT: v_bfi_b32 v11, s2, 0, v3
; SI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7|
@@ -13441,7 +13414,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB12_26:
; SI-NEXT: ; implicit-def: $vgpr11
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB12_27: ; %frem.compute77
+; SI-NEXT: .LBB12_27: ; %frem.compute
; SI-NEXT: s_mov_b32 s6, 0x7f800000
; SI-NEXT: v_cmp_lt_f32_e64 s[2:3], |v3|, s6
; SI-NEXT: v_frexp_exp_i32_f32_e32 v11, v3
@@ -13477,10 +13450,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0
; SI-NEXT: s_cmp_lt_i32 s3, 13
; SI-NEXT: s_cbranch_scc1 .LBB12_31
-; SI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; SI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; SI-NEXT: s_sub_i32 s3, s4, s5
; SI-NEXT: s_add_i32 s3, s3, 12
-; SI-NEXT: .LBB12_29: ; %frem.loop_body85
+; SI-NEXT: .LBB12_29: ; %frem.loop_body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v14, v12
; SI-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -13495,7 +13468,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: s_cbranch_scc1 .LBB12_29
; SI-NEXT: ; %bb.30: ; %Flow
; SI-NEXT: v_mov_b32_e32 v12, v14
-; SI-NEXT: .LBB12_31: ; %frem.loop_exit86
+; SI-NEXT: .LBB12_31: ; %frem.loop_exit
; SI-NEXT: s_add_i32 s3, s3, -11
; SI-NEXT: v_ldexp_f32_e64 v12, v12, s3
; SI-NEXT: v_mul_f32_e32 v13, v12, v13
@@ -13548,7 +13521,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB12_2
-; CI-NEXT: ; %bb.1: ; %frem.else
+; CI-NEXT: ; %bb.1: ; %frem.else78
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v8, s2, 0, v0
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4|
@@ -13557,7 +13530,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB12_8
; CI-NEXT: .LBB12_2:
; CI-NEXT: ; implicit-def: $vgpr8
-; CI-NEXT: .LBB12_3: ; %frem.compute
+; CI-NEXT: .LBB12_3: ; %frem.compute77
; CI-NEXT: v_frexp_mant_f32_e64 v9, |v4|
; CI-NEXT: v_ldexp_f32_e64 v9, v9, 1
; CI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0
@@ -13582,10 +13555,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10
; CI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB12_7
-; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; CI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; CI-NEXT: v_sub_i32_e32 v10, vcc, v13, v14
; CI-NEXT: v_add_i32_e32 v10, vcc, 12, v10
-; CI-NEXT: .LBB12_5: ; %frem.loop_body
+; CI-NEXT: .LBB12_5: ; %frem.loop_body85
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v13, v11
; CI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -13600,7 +13573,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB12_5
; CI-NEXT: ; %bb.6: ; %Flow125
; CI-NEXT: v_mov_b32_e32 v11, v13
-; CI-NEXT: .LBB12_7: ; %frem.loop_exit
+; CI-NEXT: .LBB12_7: ; %frem.loop_exit86
; CI-NEXT: v_add_i32_e32 v10, vcc, -11, v10
; CI-NEXT: v_ldexp_f32_e32 v10, v11, v10
; CI-NEXT: v_mul_f32_e32 v11, v10, v12
@@ -13616,7 +13589,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB12_10
-; CI-NEXT: ; %bb.9: ; %frem.else16
+; CI-NEXT: ; %bb.9: ; %frem.else47
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v9, s2, 0, v1
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5|
@@ -13625,7 +13598,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB12_16
; CI-NEXT: .LBB12_10:
; CI-NEXT: ; implicit-def: $vgpr9
-; CI-NEXT: .LBB12_11: ; %frem.compute15
+; CI-NEXT: .LBB12_11: ; %frem.compute46
; CI-NEXT: v_frexp_mant_f32_e64 v10, |v5|
; CI-NEXT: v_ldexp_f32_e64 v10, v10, 1
; CI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0
@@ -13650,10 +13623,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11
; CI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB12_15
-; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; CI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; CI-NEXT: v_sub_i32_e32 v11, vcc, v14, v15
; CI-NEXT: v_add_i32_e32 v11, vcc, 12, v11
-; CI-NEXT: .LBB12_13: ; %frem.loop_body23
+; CI-NEXT: .LBB12_13: ; %frem.loop_body54
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v14, v12
; CI-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -13668,7 +13641,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB12_13
; CI-NEXT: ; %bb.14: ; %Flow121
; CI-NEXT: v_mov_b32_e32 v12, v14
-; CI-NEXT: .LBB12_15: ; %frem.loop_exit24
+; CI-NEXT: .LBB12_15: ; %frem.loop_exit55
; CI-NEXT: v_add_i32_e32 v11, vcc, -11, v11
; CI-NEXT: v_ldexp_f32_e32 v11, v12, v11
; CI-NEXT: v_mul_f32_e32 v12, v11, v13
@@ -13684,7 +13657,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB12_18
-; CI-NEXT: ; %bb.17: ; %frem.else47
+; CI-NEXT: ; %bb.17: ; %frem.else16
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v10, s2, 0, v2
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6|
@@ -13693,7 +13666,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB12_24
; CI-NEXT: .LBB12_18:
; CI-NEXT: ; implicit-def: $vgpr10
-; CI-NEXT: .LBB12_19: ; %frem.compute46
+; CI-NEXT: .LBB12_19: ; %frem.compute15
; CI-NEXT: v_frexp_mant_f32_e64 v11, |v6|
; CI-NEXT: v_ldexp_f32_e64 v11, v11, 1
; CI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0
@@ -13718,10 +13691,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12
; CI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB12_23
-; CI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; CI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; CI-NEXT: v_sub_i32_e32 v12, vcc, v15, v16
; CI-NEXT: v_add_i32_e32 v12, vcc, 12, v12
-; CI-NEXT: .LBB12_21: ; %frem.loop_body54
+; CI-NEXT: .LBB12_21: ; %frem.loop_body23
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v15, v13
; CI-NEXT: v_mul_f32_e32 v13, v15, v14
@@ -13736,7 +13709,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB12_21
; CI-NEXT: ; %bb.22: ; %Flow117
; CI-NEXT: v_mov_b32_e32 v13, v15
-; CI-NEXT: .LBB12_23: ; %frem.loop_exit55
+; CI-NEXT: .LBB12_23: ; %frem.loop_exit24
; CI-NEXT: v_add_i32_e32 v12, vcc, -11, v12
; CI-NEXT: v_ldexp_f32_e32 v12, v13, v12
; CI-NEXT: v_mul_f32_e32 v13, v12, v14
@@ -13752,7 +13725,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB12_26
-; CI-NEXT: ; %bb.25: ; %frem.else78
+; CI-NEXT: ; %bb.25: ; %frem.else
; CI-NEXT: s_brev_b32 s2, -2
; CI-NEXT: v_bfi_b32 v11, s2, 0, v3
; CI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7|
@@ -13761,7 +13734,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB12_32
; CI-NEXT: .LBB12_26:
; CI-NEXT: ; implicit-def: $vgpr11
-; CI-NEXT: .LBB12_27: ; %frem.compute77
+; CI-NEXT: .LBB12_27: ; %frem.compute
; CI-NEXT: v_frexp_mant_f32_e64 v12, |v7|
; CI-NEXT: v_ldexp_f32_e64 v12, v12, 1
; CI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0
@@ -13786,10 +13759,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13
; CI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0
; CI-NEXT: s_cbranch_vccnz .LBB12_31
-; CI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; CI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; CI-NEXT: v_sub_i32_e32 v13, vcc, v16, v17
; CI-NEXT: v_add_i32_e32 v13, vcc, 12, v13
-; CI-NEXT: .LBB12_29: ; %frem.loop_body85
+; CI-NEXT: .LBB12_29: ; %frem.loop_body
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v16, v14
; CI-NEXT: v_mul_f32_e32 v14, v16, v15
@@ -13804,7 +13777,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_cbranch_vccnz .LBB12_29
; CI-NEXT: ; %bb.30: ; %Flow
; CI-NEXT: v_mov_b32_e32 v14, v16
-; CI-NEXT: .LBB12_31: ; %frem.loop_exit86
+; CI-NEXT: .LBB12_31: ; %frem.loop_exit
; CI-NEXT: v_add_i32_e32 v13, vcc, -11, v13
; CI-NEXT: v_ldexp_f32_e32 v13, v14, v13
; CI-NEXT: v_mul_f32_e32 v14, v13, v15
@@ -13857,7 +13830,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB12_2
-; VI-NEXT: ; %bb.1: ; %frem.else
+; VI-NEXT: ; %bb.1: ; %frem.else78
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v8, s2, 0, v0
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4|
@@ -13866,7 +13839,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB12_8
; VI-NEXT: .LBB12_2:
; VI-NEXT: ; implicit-def: $vgpr8
-; VI-NEXT: .LBB12_3: ; %frem.compute
+; VI-NEXT: .LBB12_3: ; %frem.compute77
; VI-NEXT: v_frexp_mant_f32_e64 v9, |v4|
; VI-NEXT: v_ldexp_f32 v9, v9, 1
; VI-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0
@@ -13891,10 +13864,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10
; VI-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB12_7
-; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; VI-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; VI-NEXT: v_sub_u32_e32 v10, vcc, v13, v14
; VI-NEXT: v_add_u32_e32 v10, vcc, 12, v10
-; VI-NEXT: .LBB12_5: ; %frem.loop_body
+; VI-NEXT: .LBB12_5: ; %frem.loop_body85
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v13, v11
; VI-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -13909,7 +13882,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB12_5
; VI-NEXT: ; %bb.6: ; %Flow125
; VI-NEXT: v_mov_b32_e32 v11, v13
-; VI-NEXT: .LBB12_7: ; %frem.loop_exit
+; VI-NEXT: .LBB12_7: ; %frem.loop_exit86
; VI-NEXT: v_add_u32_e32 v10, vcc, -11, v10
; VI-NEXT: v_ldexp_f32 v10, v11, v10
; VI-NEXT: v_mul_f32_e32 v11, v10, v12
@@ -13925,7 +13898,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB12_10
-; VI-NEXT: ; %bb.9: ; %frem.else16
+; VI-NEXT: ; %bb.9: ; %frem.else47
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v9, s2, 0, v1
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5|
@@ -13934,7 +13907,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB12_16
; VI-NEXT: .LBB12_10:
; VI-NEXT: ; implicit-def: $vgpr9
-; VI-NEXT: .LBB12_11: ; %frem.compute15
+; VI-NEXT: .LBB12_11: ; %frem.compute46
; VI-NEXT: v_frexp_mant_f32_e64 v10, |v5|
; VI-NEXT: v_ldexp_f32 v10, v10, 1
; VI-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0
@@ -13959,10 +13932,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11
; VI-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB12_15
-; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; VI-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; VI-NEXT: v_sub_u32_e32 v11, vcc, v14, v15
; VI-NEXT: v_add_u32_e32 v11, vcc, 12, v11
-; VI-NEXT: .LBB12_13: ; %frem.loop_body23
+; VI-NEXT: .LBB12_13: ; %frem.loop_body54
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v14, v12
; VI-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -13977,7 +13950,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB12_13
; VI-NEXT: ; %bb.14: ; %Flow121
; VI-NEXT: v_mov_b32_e32 v12, v14
-; VI-NEXT: .LBB12_15: ; %frem.loop_exit24
+; VI-NEXT: .LBB12_15: ; %frem.loop_exit55
; VI-NEXT: v_add_u32_e32 v11, vcc, -11, v11
; VI-NEXT: v_ldexp_f32 v11, v12, v11
; VI-NEXT: v_mul_f32_e32 v12, v11, v13
@@ -13993,7 +13966,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB12_18
-; VI-NEXT: ; %bb.17: ; %frem.else47
+; VI-NEXT: ; %bb.17: ; %frem.else16
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v10, s2, 0, v2
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6|
@@ -14002,7 +13975,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB12_24
; VI-NEXT: .LBB12_18:
; VI-NEXT: ; implicit-def: $vgpr10
-; VI-NEXT: .LBB12_19: ; %frem.compute46
+; VI-NEXT: .LBB12_19: ; %frem.compute15
; VI-NEXT: v_frexp_mant_f32_e64 v11, |v6|
; VI-NEXT: v_ldexp_f32 v11, v11, 1
; VI-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0
@@ -14027,10 +14000,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12
; VI-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB12_23
-; VI-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; VI-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; VI-NEXT: v_sub_u32_e32 v12, vcc, v15, v16
; VI-NEXT: v_add_u32_e32 v12, vcc, 12, v12
-; VI-NEXT: .LBB12_21: ; %frem.loop_body54
+; VI-NEXT: .LBB12_21: ; %frem.loop_body23
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v15, v13
; VI-NEXT: v_mul_f32_e32 v13, v15, v14
@@ -14045,7 +14018,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB12_21
; VI-NEXT: ; %bb.22: ; %Flow117
; VI-NEXT: v_mov_b32_e32 v13, v15
-; VI-NEXT: .LBB12_23: ; %frem.loop_exit55
+; VI-NEXT: .LBB12_23: ; %frem.loop_exit24
; VI-NEXT: v_add_u32_e32 v12, vcc, -11, v12
; VI-NEXT: v_ldexp_f32 v12, v13, v12
; VI-NEXT: v_mul_f32_e32 v13, v12, v14
@@ -14061,7 +14034,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB12_26
-; VI-NEXT: ; %bb.25: ; %frem.else78
+; VI-NEXT: ; %bb.25: ; %frem.else
; VI-NEXT: s_brev_b32 s2, -2
; VI-NEXT: v_bfi_b32 v11, s2, 0, v3
; VI-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7|
@@ -14070,7 +14043,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB12_32
; VI-NEXT: .LBB12_26:
; VI-NEXT: ; implicit-def: $vgpr11
-; VI-NEXT: .LBB12_27: ; %frem.compute77
+; VI-NEXT: .LBB12_27: ; %frem.compute
; VI-NEXT: v_frexp_mant_f32_e64 v12, |v7|
; VI-NEXT: v_ldexp_f32 v12, v12, 1
; VI-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0
@@ -14095,10 +14068,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13
; VI-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0
; VI-NEXT: s_cbranch_vccnz .LBB12_31
-; VI-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; VI-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; VI-NEXT: v_sub_u32_e32 v13, vcc, v16, v17
; VI-NEXT: v_add_u32_e32 v13, vcc, 12, v13
-; VI-NEXT: .LBB12_29: ; %frem.loop_body85
+; VI-NEXT: .LBB12_29: ; %frem.loop_body
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v16, v14
; VI-NEXT: v_mul_f32_e32 v14, v16, v15
@@ -14113,7 +14086,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_cbranch_vccnz .LBB12_29
; VI-NEXT: ; %bb.30: ; %Flow
; VI-NEXT: v_mov_b32_e32 v14, v16
-; VI-NEXT: .LBB12_31: ; %frem.loop_exit86
+; VI-NEXT: .LBB12_31: ; %frem.loop_exit
; VI-NEXT: v_add_u32_e32 v13, vcc, -11, v13
; VI-NEXT: v_ldexp_f32 v13, v14, v13
; VI-NEXT: v_mul_f32_e32 v14, v13, v15
@@ -14161,7 +14134,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v0|, |v4|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB12_2
-; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: ; %bb.1: ; %frem.else78
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v8, s2, 0, v0
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v0|, |v4|
@@ -14170,7 +14143,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB12_8
; GFX9-NEXT: .LBB12_2:
; GFX9-NEXT: ; implicit-def: $vgpr8
-; GFX9-NEXT: .LBB12_3: ; %frem.compute
+; GFX9-NEXT: .LBB12_3: ; %frem.compute77
; GFX9-NEXT: v_frexp_mant_f32_e64 v9, |v4|
; GFX9-NEXT: v_ldexp_f32 v9, v9, 1
; GFX9-NEXT: v_div_scale_f32 v15, s[2:3], v9, v9, 1.0
@@ -14195,10 +14168,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v10
; GFX9-NEXT: v_div_fixup_f32 v12, v12, v9, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB12_7
-; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; GFX9-NEXT: v_sub_u32_e32 v10, v13, v14
; GFX9-NEXT: v_add_u32_e32 v10, 12, v10
-; GFX9-NEXT: .LBB12_5: ; %frem.loop_body
+; GFX9-NEXT: .LBB12_5: ; %frem.loop_body85
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v13, v11
; GFX9-NEXT: v_mul_f32_e32 v11, v13, v12
@@ -14213,7 +14186,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB12_5
; GFX9-NEXT: ; %bb.6: ; %Flow125
; GFX9-NEXT: v_mov_b32_e32 v11, v13
-; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit
+; GFX9-NEXT: .LBB12_7: ; %frem.loop_exit86
; GFX9-NEXT: v_add_u32_e32 v10, -11, v10
; GFX9-NEXT: v_ldexp_f32 v10, v11, v10
; GFX9-NEXT: v_mul_f32_e32 v11, v10, v12
@@ -14229,7 +14202,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v1|, |v5|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB12_10
-; GFX9-NEXT: ; %bb.9: ; %frem.else16
+; GFX9-NEXT: ; %bb.9: ; %frem.else47
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v9, s2, 0, v1
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v1|, |v5|
@@ -14238,7 +14211,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB12_16
; GFX9-NEXT: .LBB12_10:
; GFX9-NEXT: ; implicit-def: $vgpr9
-; GFX9-NEXT: .LBB12_11: ; %frem.compute15
+; GFX9-NEXT: .LBB12_11: ; %frem.compute46
; GFX9-NEXT: v_frexp_mant_f32_e64 v10, |v5|
; GFX9-NEXT: v_ldexp_f32 v10, v10, 1
; GFX9-NEXT: v_div_scale_f32 v16, s[2:3], v10, v10, 1.0
@@ -14263,10 +14236,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v11
; GFX9-NEXT: v_div_fixup_f32 v13, v13, v10, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB12_15
-; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX9-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; GFX9-NEXT: v_sub_u32_e32 v11, v14, v15
; GFX9-NEXT: v_add_u32_e32 v11, 12, v11
-; GFX9-NEXT: .LBB12_13: ; %frem.loop_body23
+; GFX9-NEXT: .LBB12_13: ; %frem.loop_body54
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v14, v12
; GFX9-NEXT: v_mul_f32_e32 v12, v14, v13
@@ -14281,7 +14254,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB12_13
; GFX9-NEXT: ; %bb.14: ; %Flow121
; GFX9-NEXT: v_mov_b32_e32 v12, v14
-; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit24
+; GFX9-NEXT: .LBB12_15: ; %frem.loop_exit55
; GFX9-NEXT: v_add_u32_e32 v11, -11, v11
; GFX9-NEXT: v_ldexp_f32 v11, v12, v11
; GFX9-NEXT: v_mul_f32_e32 v12, v11, v13
@@ -14297,7 +14270,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v2|, |v6|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB12_18
-; GFX9-NEXT: ; %bb.17: ; %frem.else47
+; GFX9-NEXT: ; %bb.17: ; %frem.else16
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v10, s2, 0, v2
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v2|, |v6|
@@ -14306,7 +14279,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB12_24
; GFX9-NEXT: .LBB12_18:
; GFX9-NEXT: ; implicit-def: $vgpr10
-; GFX9-NEXT: .LBB12_19: ; %frem.compute46
+; GFX9-NEXT: .LBB12_19: ; %frem.compute15
; GFX9-NEXT: v_frexp_mant_f32_e64 v11, |v6|
; GFX9-NEXT: v_ldexp_f32 v11, v11, 1
; GFX9-NEXT: v_div_scale_f32 v17, s[2:3], v11, v11, 1.0
@@ -14331,10 +14304,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v12
; GFX9-NEXT: v_div_fixup_f32 v14, v14, v11, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB12_23
-; GFX9-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; GFX9-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; GFX9-NEXT: v_sub_u32_e32 v12, v15, v16
; GFX9-NEXT: v_add_u32_e32 v12, 12, v12
-; GFX9-NEXT: .LBB12_21: ; %frem.loop_body54
+; GFX9-NEXT: .LBB12_21: ; %frem.loop_body23
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v15, v13
; GFX9-NEXT: v_mul_f32_e32 v13, v15, v14
@@ -14349,7 +14322,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB12_21
; GFX9-NEXT: ; %bb.22: ; %Flow117
; GFX9-NEXT: v_mov_b32_e32 v13, v15
-; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit55
+; GFX9-NEXT: .LBB12_23: ; %frem.loop_exit24
; GFX9-NEXT: v_add_u32_e32 v12, -11, v12
; GFX9-NEXT: v_ldexp_f32 v12, v13, v12
; GFX9-NEXT: v_mul_f32_e32 v13, v12, v14
@@ -14365,7 +14338,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], |v3|, |v7|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB12_26
-; GFX9-NEXT: ; %bb.25: ; %frem.else78
+; GFX9-NEXT: ; %bb.25: ; %frem.else
; GFX9-NEXT: s_brev_b32 s2, -2
; GFX9-NEXT: v_bfi_b32 v11, s2, 0, v3
; GFX9-NEXT: v_cmp_eq_f32_e64 vcc, |v3|, |v7|
@@ -14374,7 +14347,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB12_32
; GFX9-NEXT: .LBB12_26:
; GFX9-NEXT: ; implicit-def: $vgpr11
-; GFX9-NEXT: .LBB12_27: ; %frem.compute77
+; GFX9-NEXT: .LBB12_27: ; %frem.compute
; GFX9-NEXT: v_frexp_mant_f32_e64 v12, |v7|
; GFX9-NEXT: v_ldexp_f32 v12, v12, 1
; GFX9-NEXT: v_div_scale_f32 v18, s[2:3], v12, v12, 1.0
@@ -14399,10 +14372,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 13, v13
; GFX9-NEXT: v_div_fixup_f32 v15, v15, v12, 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB12_31
-; GFX9-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; GFX9-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX9-NEXT: v_sub_u32_e32 v13, v16, v17
; GFX9-NEXT: v_add_u32_e32 v13, 12, v13
-; GFX9-NEXT: .LBB12_29: ; %frem.loop_body85
+; GFX9-NEXT: .LBB12_29: ; %frem.loop_body
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v16, v14
; GFX9-NEXT: v_mul_f32_e32 v14, v16, v15
@@ -14417,7 +14390,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_cbranch_vccnz .LBB12_29
; GFX9-NEXT: ; %bb.30: ; %Flow
; GFX9-NEXT: v_mov_b32_e32 v14, v16
-; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit86
+; GFX9-NEXT: .LBB12_31: ; %frem.loop_exit
; GFX9-NEXT: v_add_u32_e32 v13, -11, v13
; GFX9-NEXT: v_ldexp_f32 v13, v14, v13
; GFX9-NEXT: v_mul_f32_e32 v14, v13, v15
@@ -14466,7 +14439,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB12_2
-; GFX10-NEXT: ; %bb.1: ; %frem.else
+; GFX10-NEXT: ; %bb.1: ; %frem.else78
; GFX10-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4|
; GFX10-NEXT: v_cndmask_b32_e32 v8, v0, v8, vcc_lo
@@ -14474,7 +14447,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB12_8
; GFX10-NEXT: .LBB12_2:
; GFX10-NEXT: ; implicit-def: $vgpr8
-; GFX10-NEXT: .LBB12_3: ; %frem.compute
+; GFX10-NEXT: .LBB12_3: ; %frem.compute77
; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v4|
; GFX10-NEXT: v_frexp_mant_f32_e64 v8, |v0|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v11, v0
@@ -14501,10 +14474,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v12
; GFX10-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB12_7
-; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX10-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB12_5: ; %frem.loop_body
+; GFX10-NEXT: .LBB12_5: ; %frem.loop_body85
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v13, v10
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -14520,7 +14493,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.6: ; %Flow125
; GFX10-NEXT: v_mov_b32_e32 v12, s2
; GFX10-NEXT: v_mov_b32_e32 v10, v13
-; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit
+; GFX10-NEXT: .LBB12_7: ; %frem.loop_exit86
; GFX10-NEXT: v_add_nc_u32_e32 v12, -11, v12
; GFX10-NEXT: v_ldexp_f32 v10, v10, v12
; GFX10-NEXT: v_mul_f32_e32 v11, v10, v11
@@ -14535,7 +14508,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB12_10
-; GFX10-NEXT: ; %bb.9: ; %frem.else16
+; GFX10-NEXT: ; %bb.9: ; %frem.else47
; GFX10-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5|
; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v9, vcc_lo
@@ -14543,7 +14516,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB12_16
; GFX10-NEXT: .LBB12_10:
; GFX10-NEXT: ; implicit-def: $vgpr9
-; GFX10-NEXT: .LBB12_11: ; %frem.compute15
+; GFX10-NEXT: .LBB12_11: ; %frem.compute46
; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v5|
; GFX10-NEXT: v_frexp_mant_f32_e64 v9, |v1|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v12, v1
@@ -14570,10 +14543,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v13
; GFX10-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB12_15
-; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX10-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB12_13: ; %frem.loop_body23
+; GFX10-NEXT: .LBB12_13: ; %frem.loop_body54
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v14, v11
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -14589,7 +14562,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.14: ; %Flow121
; GFX10-NEXT: v_mov_b32_e32 v13, s2
; GFX10-NEXT: v_mov_b32_e32 v11, v14
-; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit24
+; GFX10-NEXT: .LBB12_15: ; %frem.loop_exit55
; GFX10-NEXT: v_add_nc_u32_e32 v13, -11, v13
; GFX10-NEXT: v_ldexp_f32 v11, v11, v13
; GFX10-NEXT: v_mul_f32_e32 v12, v11, v12
@@ -14604,7 +14577,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB12_18
-; GFX10-NEXT: ; %bb.17: ; %frem.else47
+; GFX10-NEXT: ; %bb.17: ; %frem.else16
; GFX10-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6|
; GFX10-NEXT: v_cndmask_b32_e32 v10, v2, v10, vcc_lo
@@ -14612,7 +14585,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB12_24
; GFX10-NEXT: .LBB12_18:
; GFX10-NEXT: ; implicit-def: $vgpr10
-; GFX10-NEXT: .LBB12_19: ; %frem.compute46
+; GFX10-NEXT: .LBB12_19: ; %frem.compute15
; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v6|
; GFX10-NEXT: v_frexp_mant_f32_e64 v10, |v2|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v13, v2
@@ -14639,10 +14612,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v14
; GFX10-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB12_23
-; GFX10-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; GFX10-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB12_21: ; %frem.loop_body54
+; GFX10-NEXT: .LBB12_21: ; %frem.loop_body23
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v15, v12
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -14658,7 +14631,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.22: ; %Flow117
; GFX10-NEXT: v_mov_b32_e32 v14, s2
; GFX10-NEXT: v_mov_b32_e32 v12, v15
-; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit55
+; GFX10-NEXT: .LBB12_23: ; %frem.loop_exit24
; GFX10-NEXT: v_add_nc_u32_e32 v14, -11, v14
; GFX10-NEXT: v_ldexp_f32 v12, v12, v14
; GFX10-NEXT: v_mul_f32_e32 v13, v12, v13
@@ -14673,7 +14646,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB12_26
-; GFX10-NEXT: ; %bb.25: ; %frem.else78
+; GFX10-NEXT: ; %bb.25: ; %frem.else
; GFX10-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3
; GFX10-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7|
; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v11, vcc_lo
@@ -14681,7 +14654,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB12_32
; GFX10-NEXT: .LBB12_26:
; GFX10-NEXT: ; implicit-def: $vgpr11
-; GFX10-NEXT: .LBB12_27: ; %frem.compute77
+; GFX10-NEXT: .LBB12_27: ; %frem.compute
; GFX10-NEXT: v_frexp_mant_f32_e64 v12, |v7|
; GFX10-NEXT: v_frexp_mant_f32_e64 v11, |v3|
; GFX10-NEXT: v_frexp_exp_i32_f32_e32 v14, v3
@@ -14708,10 +14681,10 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v15
; GFX10-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB12_31
-; GFX10-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; GFX10-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 12
-; GFX10-NEXT: .LBB12_29: ; %frem.loop_body85
+; GFX10-NEXT: .LBB12_29: ; %frem.loop_body
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v16, v13
; GFX10-NEXT: s_add_i32 s2, s2, -12
@@ -14727,7 +14700,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: ; %bb.30: ; %Flow
; GFX10-NEXT: v_mov_b32_e32 v15, s2
; GFX10-NEXT: v_mov_b32_e32 v13, v16
-; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit86
+; GFX10-NEXT: .LBB12_31: ; %frem.loop_exit
; GFX10-NEXT: v_add_nc_u32_e32 v15, -11, v15
; GFX10-NEXT: v_ldexp_f32 v13, v13, v15
; GFX10-NEXT: v_mul_f32_e32 v14, v13, v14
@@ -14773,7 +14746,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v0|, |v4|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB12_2
-; GFX11-NEXT: ; %bb.1: ; %frem.else
+; GFX11-NEXT: ; %bb.1: ; %frem.else78
; GFX11-NEXT: v_bfi_b32 v8, 0x7fffffff, 0, v0
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v0|, |v4|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -14782,7 +14755,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB12_8
; GFX11-NEXT: .LBB12_2:
; GFX11-NEXT: ; implicit-def: $vgpr8
-; GFX11-NEXT: .LBB12_3: ; %frem.compute
+; GFX11-NEXT: .LBB12_3: ; %frem.compute77
; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v4|
; GFX11-NEXT: v_frexp_mant_f32_e64 v8, |v0|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v11, v0
@@ -14818,11 +14791,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v11, v11, v9, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB12_7
-; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB12_5: ; %frem.loop_body
+; GFX11-NEXT: .LBB12_5: ; %frem.loop_body85
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v13, v10
@@ -14842,7 +14815,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.6: ; %Flow125
; GFX11-NEXT: v_mov_b32_e32 v12, s2
; GFX11-NEXT: v_mov_b32_e32 v10, v13
-; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit
+; GFX11-NEXT: .LBB12_7: ; %frem.loop_exit86
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v12, -11, v12
; GFX11-NEXT: v_ldexp_f32 v10, v10, v12
@@ -14862,7 +14835,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v1|, |v5|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB12_10
-; GFX11-NEXT: ; %bb.9: ; %frem.else16
+; GFX11-NEXT: ; %bb.9: ; %frem.else47
; GFX11-NEXT: v_bfi_b32 v9, 0x7fffffff, 0, v1
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v1|, |v5|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -14871,7 +14844,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB12_16
; GFX11-NEXT: .LBB12_10:
; GFX11-NEXT: ; implicit-def: $vgpr9
-; GFX11-NEXT: .LBB12_11: ; %frem.compute15
+; GFX11-NEXT: .LBB12_11: ; %frem.compute46
; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v5|
; GFX11-NEXT: v_frexp_mant_f32_e64 v9, |v1|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v12, v1
@@ -14907,11 +14880,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v12, v12, v10, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB12_15
-; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX11-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB12_13: ; %frem.loop_body23
+; GFX11-NEXT: .LBB12_13: ; %frem.loop_body54
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v14, v11
@@ -14931,7 +14904,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.14: ; %Flow121
; GFX11-NEXT: v_mov_b32_e32 v13, s2
; GFX11-NEXT: v_mov_b32_e32 v11, v14
-; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit24
+; GFX11-NEXT: .LBB12_15: ; %frem.loop_exit55
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v13, -11, v13
; GFX11-NEXT: v_ldexp_f32 v11, v11, v13
@@ -14951,7 +14924,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v2|, |v6|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB12_18
-; GFX11-NEXT: ; %bb.17: ; %frem.else47
+; GFX11-NEXT: ; %bb.17: ; %frem.else16
; GFX11-NEXT: v_bfi_b32 v10, 0x7fffffff, 0, v2
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v2|, |v6|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -14960,7 +14933,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB12_24
; GFX11-NEXT: .LBB12_18:
; GFX11-NEXT: ; implicit-def: $vgpr10
-; GFX11-NEXT: .LBB12_19: ; %frem.compute46
+; GFX11-NEXT: .LBB12_19: ; %frem.compute15
; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v6|
; GFX11-NEXT: v_frexp_mant_f32_e64 v10, |v2|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v13, v2
@@ -14996,11 +14969,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v13, v13, v11, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB12_23
-; GFX11-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; GFX11-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB12_21: ; %frem.loop_body54
+; GFX11-NEXT: .LBB12_21: ; %frem.loop_body23
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v15, v12
@@ -15020,7 +14993,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.22: ; %Flow117
; GFX11-NEXT: v_mov_b32_e32 v14, s2
; GFX11-NEXT: v_mov_b32_e32 v12, v15
-; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit55
+; GFX11-NEXT: .LBB12_23: ; %frem.loop_exit24
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v14, -11, v14
; GFX11-NEXT: v_ldexp_f32 v12, v12, v14
@@ -15040,7 +15013,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f32_e64 s2, |v3|, |v7|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB12_26
-; GFX11-NEXT: ; %bb.25: ; %frem.else78
+; GFX11-NEXT: ; %bb.25: ; %frem.else
; GFX11-NEXT: v_bfi_b32 v11, 0x7fffffff, 0, v3
; GFX11-NEXT: v_cmp_eq_f32_e64 vcc_lo, |v3|, |v7|
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
@@ -15049,7 +15022,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB12_32
; GFX11-NEXT: .LBB12_26:
; GFX11-NEXT: ; implicit-def: $vgpr11
-; GFX11-NEXT: .LBB12_27: ; %frem.compute77
+; GFX11-NEXT: .LBB12_27: ; %frem.compute
; GFX11-NEXT: v_frexp_mant_f32_e64 v12, |v7|
; GFX11-NEXT: v_frexp_mant_f32_e64 v11, |v3|
; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v14, v3
@@ -15085,11 +15058,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f32 v14, v14, v12, 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB12_31
-; GFX11-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; GFX11-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 12
-; GFX11-NEXT: .LBB12_29: ; %frem.loop_body85
+; GFX11-NEXT: .LBB12_29: ; %frem.loop_body
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v16, v13
@@ -15109,7 +15082,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.30: ; %Flow
; GFX11-NEXT: v_mov_b32_e32 v15, s2
; GFX11-NEXT: v_mov_b32_e32 v13, v16
-; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit86
+; GFX11-NEXT: .LBB12_31: ; %frem.loop_exit
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_add_nc_u32_e32 v15, -11, v15
; GFX11-NEXT: v_ldexp_f32 v13, v13, v15
@@ -15170,7 +15143,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s5, s12
; GFX1150-NEXT: s_cbranch_scc0 .LBB12_2
-; GFX1150-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-NEXT: ; %bb.1: ; %frem.else78
; GFX1150-NEXT: s_cmp_eq_f32 s5, s12
; GFX1150-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15180,7 +15153,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB12_8
; GFX1150-NEXT: .LBB12_2:
; GFX1150-NEXT: ; implicit-def: $vgpr0
-; GFX1150-NEXT: .LBB12_3: ; %frem.compute
+; GFX1150-NEXT: .LBB12_3: ; %frem.compute77
; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s6|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v0, |s8|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -15215,11 +15188,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
; GFX1150-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB12_7
-; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; GFX1150-NEXT: s_sub_i32 s11, s11, s12
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s11, s11, 12
-; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body
+; GFX1150-NEXT: .LBB12_5: ; %frem.loop_body85
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v5, v2
@@ -15241,7 +15214,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.6: ; %Flow125
; GFX1150-NEXT: v_mov_b32_e32 v4, s11
; GFX1150-NEXT: v_mov_b32_e32 v2, v5
-; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit
+; GFX1150-NEXT: .LBB12_7: ; %frem.loop_exit86
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v4, -11, v4
; GFX1150-NEXT: v_ldexp_f32 v2, v2, v4
@@ -15264,7 +15237,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s8, s12
; GFX1150-NEXT: s_cbranch_scc0 .LBB12_10
-; GFX1150-NEXT: ; %bb.9: ; %frem.else16
+; GFX1150-NEXT: ; %bb.9: ; %frem.else47
; GFX1150-NEXT: s_cmp_eq_f32 s8, s12
; GFX1150-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15274,7 +15247,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB12_16
; GFX1150-NEXT: .LBB12_10:
; GFX1150-NEXT: ; implicit-def: $vgpr1
-; GFX1150-NEXT: .LBB12_11: ; %frem.compute15
+; GFX1150-NEXT: .LBB12_11: ; %frem.compute46
; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s4|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v1, |s10|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -15309,11 +15282,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5
; GFX1150-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB12_15
-; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; GFX1150-NEXT: s_sub_i32 s11, s11, s12
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s11, s11, 12
-; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body23
+; GFX1150-NEXT: .LBB12_13: ; %frem.loop_body54
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v6, v3
@@ -15335,7 +15308,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.14: ; %Flow121
; GFX1150-NEXT: v_mov_b32_e32 v5, s11
; GFX1150-NEXT: v_mov_b32_e32 v3, v6
-; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit24
+; GFX1150-NEXT: .LBB12_15: ; %frem.loop_exit55
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v5, -11, v5
; GFX1150-NEXT: v_ldexp_f32 v3, v3, v5
@@ -15358,7 +15331,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s10, s12
; GFX1150-NEXT: s_cbranch_scc0 .LBB12_18
-; GFX1150-NEXT: ; %bb.17: ; %frem.else47
+; GFX1150-NEXT: ; %bb.17: ; %frem.else16
; GFX1150-NEXT: s_cmp_eq_f32 s10, s12
; GFX1150-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15368,7 +15341,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB12_24
; GFX1150-NEXT: .LBB12_18:
; GFX1150-NEXT: ; implicit-def: $vgpr2
-; GFX1150-NEXT: .LBB12_19: ; %frem.compute46
+; GFX1150-NEXT: .LBB12_19: ; %frem.compute15
; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s3|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v2, |s9|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v5, s9
@@ -15403,11 +15376,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6
; GFX1150-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB12_23
-; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; GFX1150-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; GFX1150-NEXT: s_sub_i32 s11, s11, s12
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s11, s11, 12
-; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body54
+; GFX1150-NEXT: .LBB12_21: ; %frem.loop_body23
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v7, v4
@@ -15429,7 +15402,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.22: ; %Flow117
; GFX1150-NEXT: v_mov_b32_e32 v6, s11
; GFX1150-NEXT: v_mov_b32_e32 v4, v7
-; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit55
+; GFX1150-NEXT: .LBB12_23: ; %frem.loop_exit24
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v6, -11, v6
; GFX1150-NEXT: v_ldexp_f32 v4, v4, v6
@@ -15452,7 +15425,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_cmp_ngt_f32 s9, s12
; GFX1150-NEXT: s_cbranch_scc0 .LBB12_26
-; GFX1150-NEXT: ; %bb.25: ; %frem.else78
+; GFX1150-NEXT: ; %bb.25: ; %frem.else
; GFX1150-NEXT: s_cmp_eq_f32 s9, s12
; GFX1150-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7
; GFX1150-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15462,7 +15435,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB12_32
; GFX1150-NEXT: .LBB12_26:
; GFX1150-NEXT: ; implicit-def: $vgpr3
-; GFX1150-NEXT: .LBB12_27: ; %frem.compute77
+; GFX1150-NEXT: .LBB12_27: ; %frem.compute
; GFX1150-NEXT: v_frexp_mant_f32_e64 v4, |s2|
; GFX1150-NEXT: v_frexp_mant_f32_e64 v3, |s7|
; GFX1150-NEXT: v_frexp_exp_i32_f32_e32 v6, s7
@@ -15497,11 +15470,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7
; GFX1150-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB12_31
-; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; GFX1150-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1150-NEXT: s_sub_i32 s11, s11, s12
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s11, s11, 12
-; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body85
+; GFX1150-NEXT: .LBB12_29: ; %frem.loop_body
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_mov_b32_e32 v8, v5
@@ -15523,7 +15496,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.30: ; %Flow
; GFX1150-NEXT: v_mov_b32_e32 v7, s11
; GFX1150-NEXT: v_mov_b32_e32 v5, v8
-; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit86
+; GFX1150-NEXT: .LBB12_31: ; %frem.loop_exit
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_add_nc_u32_e32 v7, -11, v7
; GFX1150-NEXT: v_ldexp_f32 v5, v5, v7
@@ -15597,7 +15570,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-NEXT: s_cmp_ngt_f32 s5, s12
; GFX1200-NEXT: s_cbranch_scc0 .LBB12_2
-; GFX1200-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-NEXT: ; %bb.1: ; %frem.else78
; GFX1200-NEXT: s_cmp_eq_f32 s5, s12
; GFX1200-NEXT: v_bfi_b32 v0, 0x7fffffff, 0, s8
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15607,7 +15580,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB12_8
; GFX1200-NEXT: .LBB12_2:
; GFX1200-NEXT: ; implicit-def: $vgpr0
-; GFX1200-NEXT: .LBB12_3: ; %frem.compute
+; GFX1200-NEXT: .LBB12_3: ; %frem.compute77
; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s6|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v0, |s8|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v3, s8
@@ -15643,11 +15616,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v4
; GFX1200-NEXT: v_div_fixup_f32 v3, v3, v1, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB12_7
-; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body85.preheader
; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-NEXT: s_add_co_i32 s11, s11, 12
-; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body
+; GFX1200-NEXT: .LBB12_5: ; %frem.loop_body85
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1200-NEXT: v_mov_b32_e32 v5, v2
@@ -15670,7 +15643,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.6: ; %Flow125
; GFX1200-NEXT: v_mov_b32_e32 v4, s11
; GFX1200-NEXT: v_mov_b32_e32 v2, v5
-; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit
+; GFX1200-NEXT: .LBB12_7: ; %frem.loop_exit86
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v4, -11, v4
; GFX1200-NEXT: v_ldexp_f32 v2, v2, v4
@@ -15694,7 +15667,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_ngt_f32 s8, s12
; GFX1200-NEXT: s_cbranch_scc0 .LBB12_10
-; GFX1200-NEXT: ; %bb.9: ; %frem.else16
+; GFX1200-NEXT: ; %bb.9: ; %frem.else47
; GFX1200-NEXT: s_cmp_eq_f32 s8, s12
; GFX1200-NEXT: v_bfi_b32 v1, 0x7fffffff, 0, s10
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15705,7 +15678,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB12_16
; GFX1200-NEXT: .LBB12_10:
; GFX1200-NEXT: ; implicit-def: $vgpr1
-; GFX1200-NEXT: .LBB12_11: ; %frem.compute15
+; GFX1200-NEXT: .LBB12_11: ; %frem.compute46
; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s4|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v1, |s10|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v4, s10
@@ -15741,11 +15714,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v5
; GFX1200-NEXT: v_div_fixup_f32 v4, v4, v2, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB12_15
-; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body54.preheader
; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s11, s11, 12
-; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body23
+; GFX1200-NEXT: .LBB12_13: ; %frem.loop_body54
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-NEXT: v_mov_b32_e32 v6, v3
@@ -15769,7 +15742,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.14: ; %Flow121
; GFX1200-NEXT: v_mov_b32_e32 v5, s11
; GFX1200-NEXT: v_mov_b32_e32 v3, v6
-; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit24
+; GFX1200-NEXT: .LBB12_15: ; %frem.loop_exit55
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v5, -11, v5
; GFX1200-NEXT: v_ldexp_f32 v3, v3, v5
@@ -15793,7 +15766,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_ngt_f32 s10, s12
; GFX1200-NEXT: s_cbranch_scc0 .LBB12_18
-; GFX1200-NEXT: ; %bb.17: ; %frem.else47
+; GFX1200-NEXT: ; %bb.17: ; %frem.else16
; GFX1200-NEXT: s_cmp_eq_f32 s10, s12
; GFX1200-NEXT: v_bfi_b32 v2, 0x7fffffff, 0, s9
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15804,7 +15777,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB12_24
; GFX1200-NEXT: .LBB12_18:
; GFX1200-NEXT: ; implicit-def: $vgpr2
-; GFX1200-NEXT: .LBB12_19: ; %frem.compute46
+; GFX1200-NEXT: .LBB12_19: ; %frem.compute15
; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s3|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v2, |s9|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v5, s9
@@ -15840,11 +15813,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v6
; GFX1200-NEXT: v_div_fixup_f32 v5, v5, v3, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB12_23
-; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body54.preheader
+; GFX1200-NEXT: ; %bb.20: ; %frem.loop_body23.preheader
; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s11, s11, 12
-; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body54
+; GFX1200-NEXT: .LBB12_21: ; %frem.loop_body23
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-NEXT: v_mov_b32_e32 v7, v4
@@ -15868,7 +15841,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.22: ; %Flow117
; GFX1200-NEXT: v_mov_b32_e32 v6, s11
; GFX1200-NEXT: v_mov_b32_e32 v4, v7
-; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit55
+; GFX1200-NEXT: .LBB12_23: ; %frem.loop_exit24
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v6, -11, v6
; GFX1200-NEXT: v_ldexp_f32 v4, v4, v6
@@ -15892,7 +15865,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cmp_ngt_f32 s9, s12
; GFX1200-NEXT: s_cbranch_scc0 .LBB12_26
-; GFX1200-NEXT: ; %bb.25: ; %frem.else78
+; GFX1200-NEXT: ; %bb.25: ; %frem.else
; GFX1200-NEXT: s_cmp_eq_f32 s9, s12
; GFX1200-NEXT: v_bfi_b32 v3, 0x7fffffff, 0, s7
; GFX1200-NEXT: s_cselect_b32 vcc_lo, -1, 0
@@ -15903,7 +15876,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB12_32
; GFX1200-NEXT: .LBB12_26:
; GFX1200-NEXT: ; implicit-def: $vgpr3
-; GFX1200-NEXT: .LBB12_27: ; %frem.compute77
+; GFX1200-NEXT: .LBB12_27: ; %frem.compute
; GFX1200-NEXT: v_frexp_mant_f32_e64 v4, |s2|
; GFX1200-NEXT: v_frexp_mant_f32_e64 v3, |s7|
; GFX1200-NEXT: v_frexp_exp_i32_f32_e32 v6, s7
@@ -15939,11 +15912,11 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 13, v7
; GFX1200-NEXT: v_div_fixup_f32 v6, v6, v4, 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB12_31
-; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body85.preheader
+; GFX1200-NEXT: ; %bb.28: ; %frem.loop_body.preheader
; GFX1200-NEXT: s_sub_co_i32 s11, s11, s12
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s11, s11, 12
-; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body85
+; GFX1200-NEXT: .LBB12_29: ; %frem.loop_body
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1200-NEXT: v_mov_b32_e32 v8, v5
@@ -15967,7 +15940,7 @@ define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.30: ; %Flow
; GFX1200-NEXT: v_mov_b32_e32 v7, s11
; GFX1200-NEXT: v_mov_b32_e32 v5, v8
-; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit86
+; GFX1200-NEXT: .LBB12_31: ; %frem.loop_exit
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_add_nc_u32_e32 v7, -11, v7
; GFX1200-NEXT: v_ldexp_f32 v5, v5, v7
@@ -16048,7 +16021,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[0:1]|, |v[4:5]|
; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
; SI-NEXT: s_cbranch_vccz .LBB13_2
-; SI-NEXT: ; %bb.1: ; %frem.else
+; SI-NEXT: ; %bb.1: ; %frem.else16
; SI-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]|
; SI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc
@@ -16059,7 +16032,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB13_2:
; SI-NEXT: ; implicit-def: $vgpr8_vgpr9
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB13_3: ; %frem.compute
+; SI-NEXT: .LBB13_3: ; %frem.compute15
; SI-NEXT: s_brev_b32 s5, -2
; SI-NEXT: v_and_b32_e32 v10, 0x7fffffff, v1
; SI-NEXT: s_mov_b32 s0, 0
@@ -16105,13 +16078,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; SI-NEXT: s_cmp_lt_i32 s6, 27
; SI-NEXT: s_cbranch_scc1 .LBB13_7
-; SI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; SI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; SI-NEXT: s_sub_i32 s0, s3, s7
; SI-NEXT: s_add_i32 s6, s0, 26
; SI-NEXT: s_mov_b32 s3, 0x432fffff
; SI-NEXT: v_mov_b32_e32 v18, 0x43300000
; SI-NEXT: v_mov_b32_e32 v14, 0
-; SI-NEXT: .LBB13_5: ; %frem.loop_body
+; SI-NEXT: .LBB13_5: ; %frem.loop_body23
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v17, v11
; SI-NEXT: v_mov_b32_e32 v16, v10
@@ -16134,7 +16107,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: ; %bb.6: ; %Flow51
; SI-NEXT: v_mov_b32_e32 v10, v16
; SI-NEXT: v_mov_b32_e32 v11, v17
-; SI-NEXT: .LBB13_7: ; %frem.loop_exit
+; SI-NEXT: .LBB13_7: ; %frem.loop_exit24
; SI-NEXT: s_sub_i32 s0, s6, 25
; SI-NEXT: v_ldexp_f64 v[10:11], v[10:11], s0
; SI-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13]
@@ -16160,7 +16133,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_cmp_ngt_f64_e64 s[0:1], |v[2:3]|, |v[6:7]|
; SI-NEXT: s_and_b64 vcc, exec, s[0:1]
; SI-NEXT: s_cbranch_vccz .LBB13_10
-; SI-NEXT: ; %bb.9: ; %frem.else16
+; SI-NEXT: ; %bb.9: ; %frem.else
; SI-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; SI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]|
; SI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc
@@ -16171,7 +16144,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: .LBB13_10:
; SI-NEXT: ; implicit-def: $vgpr10_vgpr11
; SI-NEXT: s_mov_b64 vcc, 0
-; SI-NEXT: .LBB13_11: ; %frem.compute15
+; SI-NEXT: .LBB13_11: ; %frem.compute
; SI-NEXT: s_brev_b32 s5, -2
; SI-NEXT: v_and_b32_e32 v12, 0x7fffffff, v3
; SI-NEXT: s_mov_b32 s0, 0
@@ -16217,13 +16190,13 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; SI-NEXT: s_cmp_lt_i32 s6, 27
; SI-NEXT: s_cbranch_scc1 .LBB13_15
-; SI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; SI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; SI-NEXT: s_sub_i32 s0, s3, s7
; SI-NEXT: s_add_i32 s6, s0, 26
; SI-NEXT: s_mov_b32 s3, 0x432fffff
; SI-NEXT: v_mov_b32_e32 v20, 0x43300000
; SI-NEXT: v_mov_b32_e32 v16, 0
-; SI-NEXT: .LBB13_13: ; %frem.loop_body23
+; SI-NEXT: .LBB13_13: ; %frem.loop_body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_mov_b32_e32 v19, v13
; SI-NEXT: v_mov_b32_e32 v18, v12
@@ -16246,7 +16219,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; SI-NEXT: ; %bb.14: ; %Flow
; SI-NEXT: v_mov_b32_e32 v12, v18
; SI-NEXT: v_mov_b32_e32 v13, v19
-; SI-NEXT: .LBB13_15: ; %frem.loop_exit24
+; SI-NEXT: .LBB13_15: ; %frem.loop_exit
; SI-NEXT: s_sub_i32 s0, s6, 25
; SI-NEXT: v_ldexp_f64 v[12:13], v[12:13], s0
; SI-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15]
@@ -16304,7 +16277,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB13_2
-; CI-NEXT: ; %bb.1: ; %frem.else
+; CI-NEXT: ; %bb.1: ; %frem.else16
; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]|
; CI-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; CI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc
@@ -16313,7 +16286,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB13_8
; CI-NEXT: .LBB13_2:
; CI-NEXT: ; implicit-def: $vgpr8_vgpr9
-; CI-NEXT: .LBB13_3: ; %frem.compute
+; CI-NEXT: .LBB13_3: ; %frem.compute15
; CI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; CI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5]
; CI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1]
@@ -16337,10 +16310,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17
; CI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; CI-NEXT: s_cbranch_vccnz .LBB13_7
-; CI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; CI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; CI-NEXT: v_sub_i32_e32 v14, vcc, v14, v15
; CI-NEXT: v_add_i32_e32 v17, vcc, 26, v14
-; CI-NEXT: .LBB13_5: ; %frem.loop_body
+; CI-NEXT: .LBB13_5: ; %frem.loop_body23
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v15, v11
; CI-NEXT: v_mov_b32_e32 v14, v10
@@ -16358,7 +16331,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: ; %bb.6: ; %Flow51
; CI-NEXT: v_mov_b32_e32 v10, v14
; CI-NEXT: v_mov_b32_e32 v11, v15
-; CI-NEXT: .LBB13_7: ; %frem.loop_exit
+; CI-NEXT: .LBB13_7: ; %frem.loop_exit24
; CI-NEXT: v_subrev_i32_e32 v14, vcc, 25, v17
; CI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
; CI-NEXT: s_brev_b32 s2, -2
@@ -16375,7 +16348,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]|
; CI-NEXT: s_and_b64 vcc, exec, s[2:3]
; CI-NEXT: s_cbranch_vccz .LBB13_10
-; CI-NEXT: ; %bb.9: ; %frem.else16
+; CI-NEXT: ; %bb.9: ; %frem.else
; CI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]|
; CI-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; CI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc
@@ -16384,7 +16357,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: s_branch .LBB13_16
; CI-NEXT: .LBB13_10:
; CI-NEXT: ; implicit-def: $vgpr10_vgpr11
-; CI-NEXT: .LBB13_11: ; %frem.compute15
+; CI-NEXT: .LBB13_11: ; %frem.compute
; CI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; CI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7]
; CI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3]
@@ -16408,10 +16381,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19
; CI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; CI-NEXT: s_cbranch_vccnz .LBB13_15
-; CI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; CI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; CI-NEXT: v_sub_i32_e32 v16, vcc, v16, v17
; CI-NEXT: v_add_i32_e32 v19, vcc, 26, v16
-; CI-NEXT: .LBB13_13: ; %frem.loop_body23
+; CI-NEXT: .LBB13_13: ; %frem.loop_body
; CI-NEXT: ; =>This Inner Loop Header: Depth=1
; CI-NEXT: v_mov_b32_e32 v17, v13
; CI-NEXT: v_mov_b32_e32 v16, v12
@@ -16429,7 +16402,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; CI-NEXT: ; %bb.14: ; %Flow
; CI-NEXT: v_mov_b32_e32 v12, v16
; CI-NEXT: v_mov_b32_e32 v13, v17
-; CI-NEXT: .LBB13_15: ; %frem.loop_exit24
+; CI-NEXT: .LBB13_15: ; %frem.loop_exit
; CI-NEXT: v_subrev_i32_e32 v16, vcc, 25, v19
; CI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
; CI-NEXT: s_brev_b32 s2, -2
@@ -16478,7 +16451,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB13_2
-; VI-NEXT: ; %bb.1: ; %frem.else
+; VI-NEXT: ; %bb.1: ; %frem.else16
; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]|
; VI-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; VI-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc
@@ -16487,7 +16460,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB13_8
; VI-NEXT: .LBB13_2:
; VI-NEXT: ; implicit-def: $vgpr8_vgpr9
-; VI-NEXT: .LBB13_3: ; %frem.compute
+; VI-NEXT: .LBB13_3: ; %frem.compute15
; VI-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; VI-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5]
; VI-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1]
@@ -16511,10 +16484,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17
; VI-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; VI-NEXT: s_cbranch_vccnz .LBB13_7
-; VI-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; VI-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; VI-NEXT: v_sub_u32_e32 v14, vcc, v14, v15
; VI-NEXT: v_add_u32_e32 v17, vcc, 26, v14
-; VI-NEXT: .LBB13_5: ; %frem.loop_body
+; VI-NEXT: .LBB13_5: ; %frem.loop_body23
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v15, v11
; VI-NEXT: v_mov_b32_e32 v14, v10
@@ -16532,7 +16505,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: ; %bb.6: ; %Flow51
; VI-NEXT: v_mov_b32_e32 v10, v14
; VI-NEXT: v_mov_b32_e32 v11, v15
-; VI-NEXT: .LBB13_7: ; %frem.loop_exit
+; VI-NEXT: .LBB13_7: ; %frem.loop_exit24
; VI-NEXT: v_subrev_u32_e32 v14, vcc, 25, v17
; VI-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
; VI-NEXT: s_brev_b32 s2, -2
@@ -16549,7 +16522,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]|
; VI-NEXT: s_and_b64 vcc, exec, s[2:3]
; VI-NEXT: s_cbranch_vccz .LBB13_10
-; VI-NEXT: ; %bb.9: ; %frem.else16
+; VI-NEXT: ; %bb.9: ; %frem.else
; VI-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]|
; VI-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; VI-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc
@@ -16558,7 +16531,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: s_branch .LBB13_16
; VI-NEXT: .LBB13_10:
; VI-NEXT: ; implicit-def: $vgpr10_vgpr11
-; VI-NEXT: .LBB13_11: ; %frem.compute15
+; VI-NEXT: .LBB13_11: ; %frem.compute
; VI-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; VI-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7]
; VI-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3]
@@ -16582,10 +16555,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19
; VI-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; VI-NEXT: s_cbranch_vccnz .LBB13_15
-; VI-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; VI-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; VI-NEXT: v_sub_u32_e32 v16, vcc, v16, v17
; VI-NEXT: v_add_u32_e32 v19, vcc, 26, v16
-; VI-NEXT: .LBB13_13: ; %frem.loop_body23
+; VI-NEXT: .LBB13_13: ; %frem.loop_body
; VI-NEXT: ; =>This Inner Loop Header: Depth=1
; VI-NEXT: v_mov_b32_e32 v17, v13
; VI-NEXT: v_mov_b32_e32 v16, v12
@@ -16603,7 +16576,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; VI-NEXT: ; %bb.14: ; %Flow
; VI-NEXT: v_mov_b32_e32 v12, v16
; VI-NEXT: v_mov_b32_e32 v13, v17
-; VI-NEXT: .LBB13_15: ; %frem.loop_exit24
+; VI-NEXT: .LBB13_15: ; %frem.loop_exit
; VI-NEXT: v_subrev_u32_e32 v16, vcc, 25, v19
; VI-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
; VI-NEXT: s_brev_b32 s2, -2
@@ -16647,7 +16620,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[0:1]|, |v[4:5]|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB13_2
-; GFX9-NEXT: ; %bb.1: ; %frem.else
+; GFX9-NEXT: ; %bb.1: ; %frem.else16
; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[0:1]|, |v[4:5]|
; GFX9-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; GFX9-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc
@@ -16656,7 +16629,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB13_8
; GFX9-NEXT: .LBB13_2:
; GFX9-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX9-NEXT: .LBB13_3: ; %frem.compute
+; GFX9-NEXT: .LBB13_3: ; %frem.compute15
; GFX9-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v15, v[4:5]
; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v14, v[0:1]
@@ -16680,10 +16653,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v17
; GFX9-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB13_7
-; GFX9-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX9-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX9-NEXT: v_sub_u32_e32 v14, v14, v15
; GFX9-NEXT: v_add_u32_e32 v17, 26, v14
-; GFX9-NEXT: .LBB13_5: ; %frem.loop_body
+; GFX9-NEXT: .LBB13_5: ; %frem.loop_body23
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v15, v11
; GFX9-NEXT: v_mov_b32_e32 v14, v10
@@ -16701,7 +16674,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: ; %bb.6: ; %Flow51
; GFX9-NEXT: v_mov_b32_e32 v10, v14
; GFX9-NEXT: v_mov_b32_e32 v11, v15
-; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit
+; GFX9-NEXT: .LBB13_7: ; %frem.loop_exit24
; GFX9-NEXT: v_subrev_u32_e32 v14, 25, v17
; GFX9-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
; GFX9-NEXT: s_brev_b32 s2, -2
@@ -16718,7 +16691,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], |v[2:3]|, |v[6:7]|
; GFX9-NEXT: s_and_b64 vcc, exec, s[2:3]
; GFX9-NEXT: s_cbranch_vccz .LBB13_10
-; GFX9-NEXT: ; %bb.9: ; %frem.else16
+; GFX9-NEXT: ; %bb.9: ; %frem.else
; GFX9-NEXT: v_cmp_eq_f64_e64 vcc, |v[2:3]|, |v[6:7]|
; GFX9-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; GFX9-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc
@@ -16727,7 +16700,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: s_branch .LBB13_16
; GFX9-NEXT: .LBB13_10:
; GFX9-NEXT: ; implicit-def: $vgpr10_vgpr11
-; GFX9-NEXT: .LBB13_11: ; %frem.compute15
+; GFX9-NEXT: .LBB13_11: ; %frem.compute
; GFX9-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v17, v[6:7]
; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v16, v[2:3]
@@ -16751,10 +16724,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: v_cmp_gt_i32_e32 vcc, 27, v19
; GFX9-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; GFX9-NEXT: s_cbranch_vccnz .LBB13_15
-; GFX9-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX9-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX9-NEXT: v_sub_u32_e32 v16, v16, v17
; GFX9-NEXT: v_add_u32_e32 v19, 26, v16
-; GFX9-NEXT: .LBB13_13: ; %frem.loop_body23
+; GFX9-NEXT: .LBB13_13: ; %frem.loop_body
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mov_b32_e32 v17, v13
; GFX9-NEXT: v_mov_b32_e32 v16, v12
@@ -16772,7 +16745,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX9-NEXT: ; %bb.14: ; %Flow
; GFX9-NEXT: v_mov_b32_e32 v12, v16
; GFX9-NEXT: v_mov_b32_e32 v13, v17
-; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit24
+; GFX9-NEXT: .LBB13_15: ; %frem.loop_exit
; GFX9-NEXT: v_subrev_u32_e32 v16, 25, v19
; GFX9-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
; GFX9-NEXT: s_brev_b32 s2, -2
@@ -16817,7 +16790,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB13_2
-; GFX10-NEXT: ; %bb.1: ; %frem.else
+; GFX10-NEXT: ; %bb.1: ; %frem.else16
; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]|
; GFX10-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; GFX10-NEXT: v_cndmask_b32_e32 v9, v1, v8, vcc_lo
@@ -16826,7 +16799,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB13_8
; GFX10-NEXT: .LBB13_2:
; GFX10-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX10-NEXT: .LBB13_3: ; %frem.compute
+; GFX10-NEXT: .LBB13_3: ; %frem.compute15
; GFX10-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5]
; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1]
@@ -16851,10 +16824,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17
; GFX10-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB13_7
-; GFX10-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX10-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 26
-; GFX10-NEXT: .LBB13_5: ; %frem.loop_body
+; GFX10-NEXT: .LBB13_5: ; %frem.loop_body23
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v15, v11
; GFX10-NEXT: v_mov_b32_e32 v14, v10
@@ -16873,7 +16846,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_mov_b32_e32 v10, v14
; GFX10-NEXT: v_mov_b32_e32 v17, s2
; GFX10-NEXT: v_mov_b32_e32 v11, v15
-; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit
+; GFX10-NEXT: .LBB13_7: ; %frem.loop_exit24
; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 25, v17
; GFX10-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
; GFX10-NEXT: v_mul_f64 v[12:13], v[10:11], v[12:13]
@@ -16889,7 +16862,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]|
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX10-NEXT: s_cbranch_vccz .LBB13_10
-; GFX10-NEXT: ; %bb.9: ; %frem.else16
+; GFX10-NEXT: ; %bb.9: ; %frem.else
; GFX10-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]|
; GFX10-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; GFX10-NEXT: v_cndmask_b32_e32 v11, v3, v10, vcc_lo
@@ -16898,7 +16871,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: s_branch .LBB13_16
; GFX10-NEXT: .LBB13_10:
; GFX10-NEXT: ; implicit-def: $vgpr10_vgpr11
-; GFX10-NEXT: .LBB13_11: ; %frem.compute15
+; GFX10-NEXT: .LBB13_11: ; %frem.compute
; GFX10-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7]
; GFX10-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3]
@@ -16923,10 +16896,10 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19
; GFX10-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; GFX10-NEXT: s_cbranch_vccnz .LBB13_15
-; GFX10-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX10-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX10-NEXT: s_sub_i32 s2, s2, s3
; GFX10-NEXT: s_add_i32 s2, s2, 26
-; GFX10-NEXT: .LBB13_13: ; %frem.loop_body23
+; GFX10-NEXT: .LBB13_13: ; %frem.loop_body
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mov_b32_e32 v17, v13
; GFX10-NEXT: v_mov_b32_e32 v16, v12
@@ -16945,7 +16918,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX10-NEXT: v_mov_b32_e32 v12, v16
; GFX10-NEXT: v_mov_b32_e32 v19, s2
; GFX10-NEXT: v_mov_b32_e32 v13, v17
-; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit24
+; GFX10-NEXT: .LBB13_15: ; %frem.loop_exit
; GFX10-NEXT: v_subrev_nc_u32_e32 v16, 25, v19
; GFX10-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
; GFX10-NEXT: v_mul_f64 v[14:15], v[12:13], v[14:15]
@@ -16986,7 +16959,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB13_2
-; GFX11-NEXT: ; %bb.1: ; %frem.else
+; GFX11-NEXT: ; %bb.1: ; %frem.else16
; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]|
; GFX11-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -16996,7 +16969,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB13_8
; GFX11-NEXT: .LBB13_2:
; GFX11-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX11-NEXT: .LBB13_3: ; %frem.compute
+; GFX11-NEXT: .LBB13_3: ; %frem.compute15
; GFX11-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5]
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1]
@@ -17029,12 +17002,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB13_7
-; GFX11-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX11-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 26
; GFX11-NEXT: .p2align 6
-; GFX11-NEXT: .LBB13_5: ; %frem.loop_body
+; GFX11-NEXT: .LBB13_5: ; %frem.loop_body23
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10
@@ -17054,7 +17027,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.6: ; %Flow51
; GFX11-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14
; GFX11-NEXT: v_mov_b32_e32 v11, v15
-; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit
+; GFX11-NEXT: .LBB13_7: ; %frem.loop_exit24
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 25, v17
; GFX11-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
@@ -17074,7 +17047,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]|
; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX11-NEXT: s_cbranch_vccz .LBB13_10
-; GFX11-NEXT: ; %bb.9: ; %frem.else16
+; GFX11-NEXT: ; %bb.9: ; %frem.else
; GFX11-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]|
; GFX11-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -17084,7 +17057,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_branch .LBB13_16
; GFX11-NEXT: .LBB13_10:
; GFX11-NEXT: ; implicit-def: $vgpr10_vgpr11
-; GFX11-NEXT: .LBB13_11: ; %frem.compute15
+; GFX11-NEXT: .LBB13_11: ; %frem.compute
; GFX11-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7]
; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3]
@@ -17117,12 +17090,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; GFX11-NEXT: s_cbranch_vccnz .LBB13_15
-; GFX11-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX11-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX11-NEXT: s_sub_i32 s2, s2, s3
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: s_add_i32 s2, s2, 26
; GFX11-NEXT: .p2align 6
-; GFX11-NEXT: .LBB13_13: ; %frem.loop_body23
+; GFX11-NEXT: .LBB13_13: ; %frem.loop_body
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12
@@ -17142,7 +17115,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-NEXT: ; %bb.14: ; %Flow
; GFX11-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16
; GFX11-NEXT: v_mov_b32_e32 v13, v17
-; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit24
+; GFX11-NEXT: .LBB13_15: ; %frem.loop_exit
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_subrev_nc_u32_e32 v16, 25, v19
; GFX11-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
@@ -17187,7 +17160,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]|
; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX1150-NEXT: s_cbranch_vccz .LBB13_2
-; GFX1150-NEXT: ; %bb.1: ; %frem.else
+; GFX1150-NEXT: ; %bb.1: ; %frem.else16
; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]|
; GFX1150-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -17197,7 +17170,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB13_8
; GFX1150-NEXT: .LBB13_2:
; GFX1150-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1150-NEXT: .LBB13_3: ; %frem.compute
+; GFX1150-NEXT: .LBB13_3: ; %frem.compute15
; GFX1150-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5]
; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1]
@@ -17229,12 +17202,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17
; GFX1150-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB13_7
-; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1150-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX1150-NEXT: s_sub_i32 s2, s2, s3
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s2, s2, 26
; GFX1150-NEXT: .p2align 6
-; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body
+; GFX1150-NEXT: .LBB13_5: ; %frem.loop_body23
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10
@@ -17254,7 +17227,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.6: ; %Flow51
; GFX1150-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14
; GFX1150-NEXT: v_mov_b32_e32 v11, v15
-; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit
+; GFX1150-NEXT: .LBB13_7: ; %frem.loop_exit24
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_subrev_nc_u32_e32 v14, 25, v17
; GFX1150-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
@@ -17274,7 +17247,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_ngt_f64_e64 s2, |v[2:3]|, |v[6:7]|
; GFX1150-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX1150-NEXT: s_cbranch_vccz .LBB13_10
-; GFX1150-NEXT: ; %bb.9: ; %frem.else16
+; GFX1150-NEXT: ; %bb.9: ; %frem.else
; GFX1150-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]|
; GFX1150-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -17284,7 +17257,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: s_branch .LBB13_16
; GFX1150-NEXT: .LBB13_10:
; GFX1150-NEXT: ; implicit-def: $vgpr10_vgpr11
-; GFX1150-NEXT: .LBB13_11: ; %frem.compute15
+; GFX1150-NEXT: .LBB13_11: ; %frem.compute
; GFX1150-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7]
; GFX1150-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3]
@@ -17316,12 +17289,12 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19
; GFX1150-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; GFX1150-NEXT: s_cbranch_vccnz .LBB13_15
-; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1150-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1150-NEXT: s_sub_i32 s2, s2, s3
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-NEXT: s_add_i32 s2, s2, 26
; GFX1150-NEXT: .p2align 6
-; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body23
+; GFX1150-NEXT: .LBB13_13: ; %frem.loop_body
; GFX1150-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1150-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12
@@ -17341,7 +17314,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-NEXT: ; %bb.14: ; %Flow
; GFX1150-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16
; GFX1150-NEXT: v_mov_b32_e32 v13, v17
-; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit24
+; GFX1150-NEXT: .LBB13_15: ; %frem.loop_exit
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1150-NEXT: v_subrev_nc_u32_e32 v16, 25, v19
; GFX1150-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16
@@ -17386,7 +17359,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_ngt_f64_e64 s2, |v[0:1]|, |v[4:5]|
; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX1200-NEXT: s_cbranch_vccz .LBB13_2
-; GFX1200-NEXT: ; %bb.1: ; %frem.else
+; GFX1200-NEXT: ; %bb.1: ; %frem.else16
; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[0:1]|, |v[4:5]|
; GFX1200-NEXT: v_and_b32_e32 v8, 0x80000000, v1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
@@ -17396,7 +17369,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB13_8
; GFX1200-NEXT: .LBB13_2:
; GFX1200-NEXT: ; implicit-def: $vgpr8_vgpr9
-; GFX1200-NEXT: .LBB13_3: ; %frem.compute
+; GFX1200-NEXT: .LBB13_3: ; %frem.compute15
; GFX1200-NEXT: v_frexp_mant_f64_e64 v[8:9], |v[0:1]|
; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v13, v[4:5]
; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v12, v[0:1]
@@ -17429,11 +17402,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v17
; GFX1200-NEXT: v_div_fixup_f64 v[12:13], v[12:13], v[8:9], 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB13_7
-; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body.preheader
+; GFX1200-NEXT: ; %bb.4: ; %frem.loop_body23.preheader
; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-NEXT: s_add_co_i32 s2, s2, 26
-; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body
+; GFX1200-NEXT: .LBB13_5: ; %frem.loop_body23
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
; GFX1200-NEXT: v_dual_mov_b32 v15, v11 :: v_dual_mov_b32 v14, v10
@@ -17454,7 +17427,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.6: ; %Flow51
; GFX1200-NEXT: v_dual_mov_b32 v17, s2 :: v_dual_mov_b32 v10, v14
; GFX1200-NEXT: v_mov_b32_e32 v11, v15
-; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit
+; GFX1200-NEXT: .LBB13_7: ; %frem.loop_exit24
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_subrev_nc_u32_e32 v14, 25, v17
; GFX1200-NEXT: v_ldexp_f64 v[10:11], v[10:11], v14
@@ -17476,7 +17449,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_and_b32 vcc_lo, exec_lo, s2
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_cbranch_vccz .LBB13_10
-; GFX1200-NEXT: ; %bb.9: ; %frem.else16
+; GFX1200-NEXT: ; %bb.9: ; %frem.else
; GFX1200-NEXT: v_cmp_eq_f64_e64 vcc_lo, |v[2:3]|, |v[6:7]|
; GFX1200-NEXT: v_and_b32_e32 v10, 0x80000000, v3
; GFX1200-NEXT: s_wait_alu 0xfffd
@@ -17487,7 +17460,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: s_branch .LBB13_16
; GFX1200-NEXT: .LBB13_10:
; GFX1200-NEXT: ; implicit-def: $vgpr10_vgpr11
-; GFX1200-NEXT: .LBB13_11: ; %frem.compute15
+; GFX1200-NEXT: .LBB13_11: ; %frem.compute
; GFX1200-NEXT: v_frexp_mant_f64_e64 v[10:11], |v[2:3]|
; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v15, v[6:7]
; GFX1200-NEXT: v_frexp_exp_i32_f64_e32 v14, v[2:3]
@@ -17520,11 +17493,11 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: v_cmp_gt_i32_e32 vcc_lo, 27, v19
; GFX1200-NEXT: v_div_fixup_f64 v[14:15], v[14:15], v[10:11], 1.0
; GFX1200-NEXT: s_cbranch_vccnz .LBB13_15
-; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body23.preheader
+; GFX1200-NEXT: ; %bb.12: ; %frem.loop_body.preheader
; GFX1200-NEXT: s_sub_co_i32 s2, s2, s3
; GFX1200-NEXT: s_wait_alu 0xfffe
; GFX1200-NEXT: s_add_co_i32 s2, s2, 26
-; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body23
+; GFX1200-NEXT: .LBB13_13: ; %frem.loop_body
; GFX1200-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_2)
; GFX1200-NEXT: v_dual_mov_b32 v17, v13 :: v_dual_mov_b32 v16, v12
@@ -17547,7 +17520,7 @@ define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-NEXT: ; %bb.14: ; %Flow
; GFX1200-NEXT: v_dual_mov_b32 v19, s2 :: v_dual_mov_b32 v12, v16
; GFX1200-NEXT: v_mov_b32_e32 v13, v17
-; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit24
+; GFX1200-NEXT: .LBB13_15: ; %frem.loop_exit
; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1200-NEXT: v_subrev_nc_u32_e32 v16, 25, v19
; GFX1200-NEXT: v_ldexp_f64 v[12:13], v[12:13], v16