; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1250 < %s | FileCheck %s declare i64 @llvm.umin.i64(i64, i64) declare i64 @llvm.umax.i64(i64, i64) declare i64 @llvm.smin.i64(i64, i64) declare i64 @llvm.smax.i64(i64, i64) declare i64 @llvm.abs.i64(i64, i1) declare <4 x i64> @llvm.umin.v4i64(<4 x i64>, <4 x i64>) declare <4 x i64> @llvm.umax.v4i64(<4 x i64>, <4 x i64>) declare <4 x i64> @llvm.smin.v4i64(<4 x i64>, <4 x i64>) declare <4 x i64> @llvm.smax.v4i64(<4 x i64>, <4 x i64>) define i64 @test_umin_i64(i64 %a, i64 %b) { ; CHECK-LABEL: test_umin_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_min_u64 v[0:1], v[0:1], v[2:3] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %r } define i64 @test_umax_i64(i64 %a, i64 %b) { ; CHECK-LABEL: test_umax_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_max_u64 v[0:1], v[0:1], v[2:3] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %r } define i64 @test_smin_i64(i64 %a, i64 %b) { ; CHECK-LABEL: test_smin_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_min_i64 v[0:1], v[0:1], v[2:3] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %r } define i64 @test_smax_i64(i64 %a, i64 %b) { ; CHECK-LABEL: test_smax_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_max_i64 v[0:1], v[0:1], v[2:3] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %r } define <4 x i64> @test_umin_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_umin_v4i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_min_u64 v[0:1], v[0:1], v[8:9] ; CHECK-NEXT: v_min_u64 v[2:3], v[2:3], v[10:11] ; CHECK-NEXT: v_min_u64 v[4:5], v[4:5], v[12:13] ; CHECK-NEXT: v_min_u64 v[6:7], v[6:7], v[14:15] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %r } define <4 x i64> @test_umax_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_umax_v4i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_max_u64 v[0:1], v[0:1], v[8:9] ; CHECK-NEXT: v_max_u64 v[2:3], v[2:3], v[10:11] ; CHECK-NEXT: v_max_u64 v[4:5], v[4:5], v[12:13] ; CHECK-NEXT: v_max_u64 v[6:7], v[6:7], v[14:15] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call <4 x i64> @llvm.umax.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %r } define <4 x i64> @test_smin_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_smin_v4i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_min_i64 v[0:1], v[0:1], v[8:9] ; CHECK-NEXT: v_min_i64 v[2:3], v[2:3], v[10:11] ; CHECK-NEXT: v_min_i64 v[4:5], v[4:5], v[12:13] ; CHECK-NEXT: v_min_i64 v[6:7], v[6:7], v[14:15] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %r } define <4 x i64> @test_smax_v4i64(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: test_smax_v4i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_max_i64 v[0:1], v[0:1], v[8:9] ; CHECK-NEXT: v_max_i64 v[2:3], v[2:3], v[10:11] ; CHECK-NEXT: v_max_i64 v[4:5], v[4:5], v[12:13] ; CHECK-NEXT: v_max_i64 v[6:7], v[6:7], v[14:15] ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call <4 x i64> @llvm.smax.v4i64(<4 x i64> %a, <4 x i64> %b) ret <4 x i64> %r } define i64 @test_abs_i64(i64 %a) { ; CHECK-LABEL: test_abs_i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0 ; CHECK-NEXT: s_wait_kmcnt 0x0 ; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v1 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; CHECK-NEXT: v_mov_b32_e32 v3, v2 ; CHECK-NEXT: v_add_nc_u64_e32 v[0:1], v[0:1], v[2:3] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v2 ; CHECK-NEXT: s_set_pc_i64 s[30:31] %r = call i64 @llvm.abs.i64(i64 %a, i1 0) ret i64 %r } define amdgpu_ps i64 @test_umin_i64_s(i64 inreg %a, i64 inreg %b) { ; CHECK-LABEL: test_umin_i64_s: ; CHECK: ; %bb.0: ; CHECK-NEXT: v_min_u64 v[0:1], s[0:1], s[2:3] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: v_readfirstlane_b32 s1, v1 ; CHECK-NEXT: ; return to shader part epilog %r = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %r } define amdgpu_ps i64 @test_umax_i64_s(i64 inreg %a, i64 inreg %b) { ; CHECK-LABEL: test_umax_i64_s: ; CHECK: ; %bb.0: ; CHECK-NEXT: v_max_u64 v[0:1], s[0:1], s[2:3] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: v_readfirstlane_b32 s1, v1 ; CHECK-NEXT: ; return to shader part epilog %r = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %r } define amdgpu_ps i64 @test_smin_i64_s(i64 inreg %a, i64 inreg %b) { ; CHECK-LABEL: test_smin_i64_s: ; CHECK: ; %bb.0: ; CHECK-NEXT: v_min_i64 v[0:1], s[0:1], s[2:3] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: v_readfirstlane_b32 s1, v1 ; CHECK-NEXT: ; return to shader part epilog %r = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %r } define amdgpu_ps i64 @test_smax_i64_s(i64 inreg %a, i64 inreg %b) { ; CHECK-LABEL: test_smax_i64_s: ; CHECK: ; %bb.0: ; CHECK-NEXT: v_max_i64 v[0:1], s[0:1], s[2:3] ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; CHECK-NEXT: v_readfirstlane_b32 s0, v0 ; CHECK-NEXT: v_readfirstlane_b32 s1, v1 ; CHECK-NEXT: ; return to shader part epilog %r = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %r } define amdgpu_ps i64 @test_abs_i64_s(i64 inreg %a) { ; CHECK-LABEL: test_abs_i64_s: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_ashr_i32 s2, s1, 31 ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; CHECK-NEXT: s_mov_b32 s3, s2 ; CHECK-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; CHECK-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3] ; CHECK-NEXT: ; return to shader part epilog %r = call i64 @llvm.abs.i64(i64 %a, i1 0) ret i64 %r }