aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/frem.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/frem.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/frem.ll65
1 files changed, 19 insertions, 46 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/frem.ll b/llvm/test/CodeGen/AMDGPU/frem.ll
index 415828f..35d178c 100644
--- a/llvm/test/CodeGen/AMDGPU/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/frem.ll
@@ -5972,16 +5972,14 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: .LBB9_16: ; %Flow54
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v1.l
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l|
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v4.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v2.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v3.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX11-TRUE16-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0xffff, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2
; GFX11-TRUE16-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
@@ -6422,19 +6420,16 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s2, s3
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s5, 0
; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2
; GFX1150-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1150-TRUE16-NEXT: s_endpgm
;
@@ -6902,20 +6897,17 @@ define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s2, 0x7c00
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1200-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s2, s3
; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s5, 0
; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s2
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s4, 0x7c00
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v1.l, s2
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v1, 16, v0
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s2
; GFX1200-TRUE16-NEXT: global_store_b32 v2, v0, s[0:1]
; GFX1200-TRUE16-NEXT: s_endpgm
;
@@ -9346,29 +9338,23 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX11-TRUE16-NEXT: .LBB10_32: ; %Flow124
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v2.l
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v0.l|
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v6.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v4.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v5.l|
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v3.l
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v2.l, 0x7e00, v7.l, s2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v7.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v1.l|
-; GFX11-TRUE16-NEXT: v_mov_b32_e32 v3, 0
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_4) | instid1(VALU_DEP_2)
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v0, v2, 16, v0
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
; GFX11-TRUE16-NEXT: v_cmp_lg_f16_e32 vcc_lo, 0, v10.l
; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, 0x7e00, v8.l, s2
; GFX11-TRUE16-NEXT: v_cmp_nle_f16_e64 s2, 0x7c00, |v9.l|
-; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX11-TRUE16-NEXT: s_and_b32 s2, s2, vcc_lo
-; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v11.l, s2
-; GFX11-TRUE16-NEXT: v_lshl_or_b32 v1, v4, 16, v1
-; GFX11-TRUE16-NEXT: global_store_b64 v3, v[0:1], s[0:1]
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v11.l, s2
+; GFX11-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-TRUE16-NEXT: s_endpgm
;
; GFX11-FAKE16-LABEL: frem_v4f16:
@@ -10209,21 +10195,19 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s3, 0x7c00
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s3, s3, s4
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s6, 0
; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s3, s4, s3
; GFX1150-TRUE16-NEXT: s_cmp_lg_f16 s2, 0
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
@@ -10232,13 +10216,10 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1150-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX1150-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1150-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
-; GFX1150-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1150-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
+; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX1150-TRUE16-NEXT: s_and_b32 s2, s3, s2
-; GFX1150-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1150-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2
-; GFX1150-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX1150-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2
; GFX1150-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1150-TRUE16-NEXT: s_endpgm
;
@@ -11147,18 +11128,14 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x7e00, v0.l, s3
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s5, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s4, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s3, s4, s3
; GFX1200-TRUE16-NEXT: s_cmp_lg_f16 s2, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v4.l, 0x7e00, v1.l, s3
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x7e00, v1.l, s3
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s8, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v0, v4, 16, v0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
@@ -11168,15 +11145,11 @@ define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %i
; GFX1200-TRUE16-NEXT: v_mov_b32_e32 v2, 0
; GFX1200-TRUE16-NEXT: s_cselect_b32 s2, -1, 0
; GFX1200-TRUE16-NEXT: s_cmp_nge_f16 s7, 0x7c00
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1200-TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX1200-TRUE16-NEXT: s_cselect_b32 s3, -1, 0
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
; GFX1200-TRUE16-NEXT: s_and_b32 s2, s3, s2
; GFX1200-TRUE16-NEXT: s_wait_alu 0xfffe
-; GFX1200-TRUE16-NEXT: v_cndmask_b16 v3.l, 0x7e00, v3.l, s2
-; GFX1200-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1200-TRUE16-NEXT: v_lshl_or_b32 v1, v3, 16, v1
+; GFX1200-TRUE16-NEXT: v_cndmask_b16 v1.h, 0x7e00, v3.l, s2
; GFX1200-TRUE16-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX1200-TRUE16-NEXT: s_endpgm
;