diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/frem.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/frem.ll | 65 |
1 files changed, 19 insertions, 46 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll index 415828f..35d178c 100644 --- a/llvm/test/CodeGen/AMDGPU/frem.ll +++ b/llvm/test/CodeGen/AMDGPU/frem.ll @@ -5972,16 +5972,14 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: .LBB9_16: ; %Flow54 ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l| +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v4.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, s2 ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v3.l| -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo -; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2 ; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; @@ -6422,19 +6420,16 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s2, s2, s3 ; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s5, 0 ; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00 -; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2 -; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2 ; GFX1150-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1150-TRUE16-NEXT: s_endpgm ; @@ -6902,20 +6897,17 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1200-TRUE16-NEXT: s_and_b32 s2, s2, s3 ; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s5, 0 ; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00 -; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe -; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0 +; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2 ; GFX1200-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1] ; GFX1200-TRUE16-NEXT: s_endpgm ; @@ -9346,29 +9338,23 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX11-TRUE16-NEXT: .LBB10_32: ; %Flow124 ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v2.l ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l| +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v6.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v4.l, s2 ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v5.l| -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v3.l -; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2 +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2 ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v1.l| -; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo ; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v10.l ; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v8.l, s2 ; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v9.l| -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v11.l, s2 -; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v4, 16, v1 -; GFX11-TRUE16-NEXT: global_store_b64 v3, v[0:1], s[0:1] +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v11.l, s2 +; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX11-TRUE16-NEXT: s_endpgm ; ; GFX11-FAKE16-LABEL: frem_v4f16: @@ -10209,21 +10195,19 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s3, 0x7c00 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s3, s3, s4 ; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s6, 0 ; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00 -; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s3, s4, s3 ; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s2, 0 -; GFX1150-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3 +; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00 -; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2 @@ -10232,13 +10216,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1150-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 +; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2 -; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1150-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2 -; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1 +; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2 ; GFX1150-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1150-TRUE16-NEXT: s_endpgm ; @@ -11147,18 +11128,14 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s4, -1, 0 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_and_b32 s3, s4, s3 ; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s2, 0 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe -; GFX1200-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3 +; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2 @@ -11168,15 +11145,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i ; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, 0 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0 ; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1200-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe ; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2 ; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe -; GFX1200-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2 -; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1 +; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2 ; GFX1200-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1] ; GFX1200-TRUE16-NEXT: s_endpgm ; |