diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/max.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/max.ll | 324 |
1 files changed, 322 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll index 3d8d849..fef9a9a 100644 --- a/llvm/test/CodeGen/AMDGPU/max.ll +++ b/llvm/test/CodeGen/AMDGPU/max.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s -; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG %s +; RUN: llc -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -enable-var-scope -check-prefix=SI %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 --amdgpu-enable-vopd=0 < %s | FileCheck -enable-var-scope -check-prefix=GFX1250 %s +; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s define amdgpu_kernel void @v_test_imax_sge_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { ; SI-LABEL: v_test_imax_sge_i32: @@ -24,6 +25,23 @@ define amdgpu_kernel void @v_test_imax_sge_i32(ptr addrspace(1) %out, ptr addrsp ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_imax_sge_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_i32_e32 v0, s2, v0 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_imax_sge_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -80,6 +98,26 @@ define amdgpu_kernel void @v_test_imax_sge_v4i32(ptr addrspace(1) %out, ptr addr ; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_imax_sge_v4i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v4, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b128 v[0:3], v0, s[0:1] scale_offset +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_i32_e32 v3, s7, v3 +; GFX1250-NEXT: v_max_i32_e32 v2, s6, v2 +; GFX1250-NEXT: v_max_i32_e32 v1, s5, v1 +; GFX1250-NEXT: v_max_i32_e32 v0, s4, v0 +; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_imax_sge_v4i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -127,6 +165,17 @@ define amdgpu_kernel void @s_test_imax_sge_i32(ptr addrspace(1) %out, i32 %a, i3 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sge_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sge_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -156,6 +205,17 @@ define amdgpu_kernel void @s_test_imax_sge_imm_i32(ptr addrspace(1) %out, i32 %a ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sge_imm_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, 9 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sge_imm_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -195,6 +255,23 @@ define amdgpu_kernel void @v_test_imax_sge_i8(ptr addrspace(1) %out, ptr addrspa ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_imax_sge_i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_i8 s2, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_i8 s3, s[4:5], 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_imax_sge_i8: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] @@ -250,6 +327,17 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_i32(ptr addrspace(1) %out, i32 %a ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sgt_imm_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, 9 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sgt_imm_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -282,6 +370,18 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(ptr addrspace(1) %out, <2 x ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sgt_imm_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, 9 +; GFX1250-NEXT: s_max_i32 s3, s3, 9 +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: v_mov_b32_e32 v1, s3 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sgt_imm_v2i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] @@ -322,6 +422,23 @@ define amdgpu_kernel void @v_test_imax_sgt_i32(ptr addrspace(1) %out, ptr addrsp ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_imax_sgt_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_i32_e32 v0, s2, v0 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_imax_sgt_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -366,6 +483,17 @@ define amdgpu_kernel void @s_test_imax_sgt_i32(ptr addrspace(1) %out, i32 %a, i3 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sgt_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_i32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sgt_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -404,6 +532,23 @@ define amdgpu_kernel void @v_test_umax_uge_i32(ptr addrspace(1) %out, ptr addrsp ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_umax_uge_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_u32_e32 v0, s2, v0 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_umax_uge_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -448,6 +593,17 @@ define amdgpu_kernel void @s_test_umax_uge_i32(ptr addrspace(1) %out, i32 %a, i3 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_umax_uge_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_u32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_umax_uge_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -483,6 +639,22 @@ define amdgpu_kernel void @s_test_umax_uge_v3i32(ptr addrspace(1) %out, <3 x i32 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_umax_uge_v3i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_clause 0x1 +; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v3, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_u32 s2, s10, s14 +; GFX1250-NEXT: s_max_u32 s3, s8, s12 +; GFX1250-NEXT: s_max_u32 s4, s9, s13 +; GFX1250-NEXT: v_mov_b32_e32 v0, s3 +; GFX1250-NEXT: v_mov_b32_e32 v1, s4 +; GFX1250-NEXT: v_mov_b32_e32 v2, s2 +; GFX1250-NEXT: global_store_b96 v3, v[0:2], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_umax_uge_v3i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -527,6 +699,23 @@ define amdgpu_kernel void @v_test_umax_uge_i8(ptr addrspace(1) %out, ptr addrspa ; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_umax_uge_i8: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_load_u8 s2, s[2:3], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_u8 s3, s[4:5], 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_u32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_umax_uge_i8: ; EG: ; %bb.0: ; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] @@ -581,6 +770,22 @@ define amdgpu_kernel void @v_test_umax_ugt_i32(ptr addrspace(1) %out, ptr addrsp ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: v_test_umax_ugt_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34 +; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1250-NEXT: v_mov_b32_e32 v1, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset +; GFX1250-NEXT: s_load_b32 s2, s[0:1], 0x0 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_loadcnt 0x0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_u32_e32 v0, s2, v0 +; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: v_test_umax_ugt_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[] @@ -625,6 +830,17 @@ define amdgpu_kernel void @s_test_umax_ugt_i32(ptr addrspace(1) %out, i32 %a, i3 ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_umax_ugt_i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_u32 s2, s2, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_umax_ugt_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -657,6 +873,18 @@ define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(ptr addrspace(1) %out, <2 x ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_umax_ugt_imm_v2i32: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_max_u32 s2, s2, 15 +; GFX1250-NEXT: s_max_u32 s3, s3, 23 +; GFX1250-NEXT: v_mov_b32_e32 v0, s2 +; GFX1250-NEXT: v_mov_b32_e32 v1, s3 +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_umax_ugt_imm_v2i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] @@ -693,6 +921,22 @@ define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(ptr addrspac ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: simplify_demanded_bits_test_umax_ugt_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff +; GFX1250-NEXT: s_and_b32 s3, s3, 0xffff +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_max_u32 s2, s2, s3 +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: simplify_demanded_bits_test_umax_ugt_i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[], KC1[] @@ -740,6 +984,22 @@ define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(ptr addrspace ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: simplify_demanded_bits_test_max_slt_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70 +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_sext_i32_i16 s2, s2 +; GFX1250-NEXT: s_sext_i32_i16 s3, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_max_i32 s2, s2, s3 +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: simplify_demanded_bits_test_max_slt_i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[], KC1[] @@ -786,6 +1046,22 @@ define amdgpu_kernel void @s_test_imax_sge_i16(ptr addrspace(1) %out, [8 x i32], ; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: s_test_imax_sge_i16: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_clause 0x2 +; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x70 +; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x4c +; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX1250-NEXT: v_mov_b32_e32 v0, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: s_sext_i32_i16 s2, s2 +; GFX1250-NEXT: s_sext_i32_i16 s3, s3 +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1250-NEXT: s_max_i32 s2, s3, s2 +; GFX1250-NEXT: v_mov_b32_e32 v1, s2 +; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: s_test_imax_sge_i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[], KC1[] @@ -844,6 +1120,17 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr addrspace(1) %out, i64 %a, i64 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: test_umax_ugt_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: test_umax_ugt_i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -886,6 +1173,17 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr addrspace(1) %out, i64 %a, i64 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: test_umax_uge_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: test_umax_uge_i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -928,6 +1226,17 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr addrspace(1) %out, i64 %a, i64 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: test_imax_sgt_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: test_imax_sgt_i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -970,6 +1279,17 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr addrspace(1) %out, i64 %a, i64 ; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; SI-NEXT: s_endpgm ; +; GFX1250-LABEL: test_imax_sge_i64: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 +; GFX1250-NEXT: v_mov_b32_e32 v2, 0 +; GFX1250-NEXT: s_wait_kmcnt 0x0 +; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5] +; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1] +; GFX1250-NEXT: s_endpgm +; ; EG-LABEL: test_imax_sge_i64: ; EG: ; %bb.0: ; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] |