diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll | 128 |
1 files changed, 40 insertions, 88 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index b0dd187..c28b25c7 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -599,10 +599,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 ; SI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe ; SI-GISEL-NEXT: s_or_b32 s4, s7, s4 -; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; SI-GISEL-NEXT: s_or_b32 s4, s6, s4 -; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 ; SI-GISEL-NEXT: s_lshl_b32 s7, s3, 12 @@ -711,10 +709,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10 ; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe ; VI-GISEL-NEXT: s_or_b32 s2, s6, s2 -; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 ; VI-GISEL-NEXT: s_or_b32 s2, s5, s2 -; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4 ; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12 @@ -824,10 +820,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10 ; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe ; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0 ; GFX9-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s4 ; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12 @@ -937,10 +931,8 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX950-GISEL-NEXT: s_addk_i32 s4, 0xfc10 ; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffe ; GFX950-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0 ; GFX950-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX950-GISEL-NEXT: s_sub_i32 s7, 1, s4 ; GFX950-GISEL-NEXT: s_lshl_b32 s6, s4, 12 @@ -1118,17 +1110,15 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 ; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8 -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 ; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2 -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000 @@ -1175,17 +1165,15 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 ; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 ; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2 -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000 @@ -1366,17 +1354,15 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 ; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8 -; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 +; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10 ; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 -; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2 -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4 ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000 @@ -1423,17 +1409,15 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX1250-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 ; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8 -; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s4, 0xfc10 ; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 -; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2 -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s6, 1, s4 ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000 @@ -2154,10 +2138,8 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 ; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe ; SI-GISEL-NEXT: s_or_b32 s4, s9, s4 -; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 -; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 ; SI-GISEL-NEXT: s_lshl_b32 s8, s8, 9 ; SI-GISEL-NEXT: s_lshl_b32 s9, s3, 12 @@ -2193,12 +2175,10 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 ; SI-GISEL-NEXT: s_addk_i32 s5, 0xfc10 ; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe -; SI-GISEL-NEXT: s_or_b32 s6, s9, s6 ; SI-GISEL-NEXT: s_or_b32 s3, s4, s3 -; SI-GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s9, s6 ; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 -; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 ; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 ; SI-GISEL-NEXT: s_lshl_b32 s8, s5, 12 @@ -2355,10 +2335,8 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; VI-GISEL-NEXT: s_addk_i32 s2, 0xfc10 ; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffe ; VI-GISEL-NEXT: s_or_b32 s4, s8, s4 -; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; VI-GISEL-NEXT: s_or_b32 s3, s3, s4 -; VI-GISEL-NEXT: s_cmp_lg_u32 s3, 0 ; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; VI-GISEL-NEXT: s_sub_i32 s9, 1, s2 ; VI-GISEL-NEXT: s_lshl_b32 s8, s2, 12 @@ -2392,14 +2370,12 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 ; VI-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 ; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 8 -; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 ; VI-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; VI-GISEL-NEXT: s_or_b32 s5, s5, s6 -; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0 ; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; VI-GISEL-NEXT: s_or_b32 s4, s4, s5 -; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; VI-GISEL-NEXT: s_sub_i32 s8, 1, s3 ; VI-GISEL-NEXT: s_lshl_b32 s6, s3, 12 @@ -2555,10 +2531,8 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX9-GISEL-NEXT: s_addk_i32 s2, 0xfc10 ; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0xffe ; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-GISEL-NEXT: s_or_b32 s3, s3, s4 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; GFX9-GISEL-NEXT: s_sub_i32 s9, 1, s2 ; GFX9-GISEL-NEXT: s_lshl_b32 s8, s2, 12 @@ -2592,14 +2566,12 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2 ; GFX9-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 ; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 8 -; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10 ; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-GISEL-NEXT: s_or_b32 s4, s4, s5 -; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s3 ; GFX9-GISEL-NEXT: s_lshl_b32 s6, s3, 12 @@ -2752,10 +2724,8 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-GISEL-NEXT: s_addk_i32 s2, 0xfc10 ; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffe ; GFX950-GISEL-NEXT: s_or_b32 s4, s8, s4 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; GFX950-GISEL-NEXT: s_or_b32 s3, s3, s4 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 ; GFX950-GISEL-NEXT: s_sub_i32 s9, 1, s2 ; GFX950-GISEL-NEXT: s_lshl_b32 s8, s2, 12 @@ -2789,14 +2759,12 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2 ; GFX950-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 ; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 8 -; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; GFX950-GISEL-NEXT: s_addk_i32 s3, 0xfc10 ; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff ; GFX950-GISEL-NEXT: s_or_b32 s5, s5, s6 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s5, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX950-GISEL-NEXT: s_or_b32 s4, s4, s5 -; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 ; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 ; GFX950-GISEL-NEXT: s_sub_i32 s8, 1, s3 ; GFX950-GISEL-NEXT: s_lshl_b32 s6, s3, 12 @@ -3073,17 +3041,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 ; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8 -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s2, 0xfc10 ; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 ; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s8, 1, s2 ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000 @@ -3115,19 +3081,17 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3 ; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16 -; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 ; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8 ; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 ; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s8, s6 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 -; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3 -; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 ; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000 @@ -3176,17 +3140,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 ; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s2, 0xfc10 ; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 ; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s8, 1, s2 ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000 @@ -3218,19 +3180,17 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3 ; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16 -; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 ; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8 ; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 ; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s8, s6 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 -; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3 -; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 ; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000 @@ -3511,17 +3471,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 ; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 ; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8 -; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 +; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s2, 0xfc10 ; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 -; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4 -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s8, 1, s2 ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000 @@ -3553,19 +3511,17 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s2, 0x40f ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3 ; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16 -; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 ; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8 ; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10 ; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s8, s6 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 -; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3 -; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4 ; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000 @@ -3614,17 +3570,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 ; GFX1250-GISEL-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 ; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8 -; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s2, 0xfc10 ; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 -; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4 -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s8, 1, s2 ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000 @@ -3656,19 +3610,17 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX1250-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s2, 0x40f ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3 ; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16 -; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 ; GFX1250-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8 ; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff ; GFX1250-GISEL-FAKE16-NEXT: s_addk_co_i32 s4, 0xfc10 ; GFX1250-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s8, s6 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 -; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3 -; GFX1250-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 ; GFX1250-GISEL-FAKE16-NEXT: s_sub_co_i32 s6, 1, s4 ; GFX1250-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000 |