aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/max.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/max.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/max.ll324
1 files changed, 322 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll
index 3d8d849..fef9a9a 100644
--- a/llvm/test/CodeGen/AMDGPU/max.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
-; RUN: llc -mtriple=amdgcn -mcpu=pitcairn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
-; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG %s
+; RUN: llc -mtriple=amdgcn -mcpu=pitcairn < %s | FileCheck -enable-var-scope -check-prefix=SI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 --amdgpu-enable-vopd=0 < %s | FileCheck -enable-var-scope -check-prefix=GFX1250 %s
+; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
define amdgpu_kernel void @v_test_imax_sge_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
; SI-LABEL: v_test_imax_sge_i32:
@@ -24,6 +25,23 @@ define amdgpu_kernel void @v_test_imax_sge_i32(ptr addrspace(1) %out, ptr addrsp
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_imax_sge_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_i32_e32 v0, s2, v0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_imax_sge_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[]
@@ -80,6 +98,26 @@ define amdgpu_kernel void @v_test_imax_sge_v4i32(ptr addrspace(1) %out, ptr addr
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_imax_sge_v4i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: v_mov_b32_e32 v4, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b128 v[0:3], v0, s[0:1] scale_offset
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_i32_e32 v3, s7, v3
+; GFX1250-NEXT: v_max_i32_e32 v2, s6, v2
+; GFX1250-NEXT: v_max_i32_e32 v1, s5, v1
+; GFX1250-NEXT: v_max_i32_e32 v0, s4, v0
+; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_imax_sge_v4i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[]
@@ -127,6 +165,17 @@ define amdgpu_kernel void @s_test_imax_sge_i32(ptr addrspace(1) %out, i32 %a, i3
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sge_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sge_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -156,6 +205,17 @@ define amdgpu_kernel void @s_test_imax_sge_imm_i32(ptr addrspace(1) %out, i32 %a
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sge_imm_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, 9
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sge_imm_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -195,6 +255,23 @@ define amdgpu_kernel void @v_test_imax_sge_i8(ptr addrspace(1) %out, ptr addrspa
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_imax_sge_i8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_i8 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_i8 s3, s[4:5], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_imax_sge_i8:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[]
@@ -250,6 +327,17 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_i32(ptr addrspace(1) %out, i32 %a
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sgt_imm_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b96 s[0:2], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, 9
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sgt_imm_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -282,6 +370,18 @@ define amdgpu_kernel void @s_test_imax_sgt_imm_v2i32(ptr addrspace(1) %out, <2 x
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sgt_imm_v2i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, 9
+; GFX1250-NEXT: s_max_i32 s3, s3, 9
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s3
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sgt_imm_v2i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
@@ -322,6 +422,23 @@ define amdgpu_kernel void @v_test_imax_sgt_i32(ptr addrspace(1) %out, ptr addrsp
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_imax_sgt_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_i32_e32 v0, s2, v0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_imax_sgt_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[]
@@ -366,6 +483,17 @@ define amdgpu_kernel void @s_test_imax_sgt_i32(ptr addrspace(1) %out, i32 %a, i3
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sgt_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_i32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sgt_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -404,6 +532,23 @@ define amdgpu_kernel void @v_test_umax_uge_i32(ptr addrspace(1) %out, ptr addrsp
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_umax_uge_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_u32_e32 v0, s2, v0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_umax_uge_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[]
@@ -448,6 +593,17 @@ define amdgpu_kernel void @s_test_umax_uge_i32(ptr addrspace(1) %out, i32 %a, i3
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_umax_uge_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_u32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_umax_uge_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -483,6 +639,22 @@ define amdgpu_kernel void @s_test_umax_uge_v3i32(ptr addrspace(1) %out, <3 x i32
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_umax_uge_v3i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_clause 0x1
+; GFX1250-NEXT: s_load_b256 s[8:15], s[4:5], 0x34
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v3, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_u32 s2, s10, s14
+; GFX1250-NEXT: s_max_u32 s3, s8, s12
+; GFX1250-NEXT: s_max_u32 s4, s9, s13
+; GFX1250-NEXT: v_mov_b32_e32 v0, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s4
+; GFX1250-NEXT: v_mov_b32_e32 v2, s2
+; GFX1250-NEXT: global_store_b96 v3, v[0:2], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_umax_uge_v3i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -527,6 +699,23 @@ define amdgpu_kernel void @v_test_umax_uge_i8(ptr addrspace(1) %out, ptr addrspa
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_umax_uge_i8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_load_u8 s2, s[2:3], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_u8 s3, s[4:5], 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_u32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_umax_uge_i8:
; EG: ; %bb.0:
; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[]
@@ -581,6 +770,22 @@ define amdgpu_kernel void @v_test_umax_ugt_i32(ptr addrspace(1) %out, ptr addrsp
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: v_test_umax_ugt_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x34
+; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-NEXT: v_mov_b32_e32 v1, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: global_load_b32 v0, v0, s[0:1] scale_offset
+; GFX1250-NEXT: s_load_b32 s2, s[0:1], 0x0
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_loadcnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_u32_e32 v0, s2, v0
+; GFX1250-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: v_test_umax_ugt_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 3, @10, KC0[CB0:0-32], KC1[]
@@ -625,6 +830,17 @@ define amdgpu_kernel void @s_test_umax_ugt_i32(ptr addrspace(1) %out, i32 %a, i3
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_umax_ugt_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_u32 s2, s2, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_umax_ugt_i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[]
@@ -657,6 +873,18 @@ define amdgpu_kernel void @s_test_umax_ugt_imm_v2i32(ptr addrspace(1) %out, <2 x
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_umax_ugt_imm_v2i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_max_u32 s2, s2, 15
+; GFX1250-NEXT: s_max_u32 s3, s3, 23
+; GFX1250-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s3
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_umax_ugt_imm_v2i32:
; EG: ; %bb.0:
; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[]
@@ -693,6 +921,22 @@ define amdgpu_kernel void @simplify_demanded_bits_test_umax_ugt_i16(ptr addrspac
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: simplify_demanded_bits_test_umax_ugt_i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_clause 0x2
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_and_b32 s2, s2, 0xffff
+; GFX1250-NEXT: s_and_b32 s3, s3, 0xffff
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: s_max_u32 s2, s2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: simplify_demanded_bits_test_umax_ugt_i16:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[], KC1[]
@@ -740,6 +984,22 @@ define amdgpu_kernel void @simplify_demanded_bits_test_max_slt_i16(ptr addrspace
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: simplify_demanded_bits_test_max_slt_i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_clause 0x2
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x4c
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x70
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_sext_i32_i16 s2, s2
+; GFX1250-NEXT: s_sext_i32_i16 s3, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: s_max_i32 s2, s2, s3
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: simplify_demanded_bits_test_max_slt_i16:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[], KC1[]
@@ -786,6 +1046,22 @@ define amdgpu_kernel void @s_test_imax_sge_i16(ptr addrspace(1) %out, [8 x i32],
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: s_test_imax_sge_i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_clause 0x2
+; GFX1250-NEXT: s_load_b32 s2, s[4:5], 0x70
+; GFX1250-NEXT: s_load_b32 s3, s[4:5], 0x4c
+; GFX1250-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX1250-NEXT: v_mov_b32_e32 v0, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: s_sext_i32_i16 s2, s2
+; GFX1250-NEXT: s_sext_i32_i16 s3, s3
+; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-NEXT: s_max_i32 s2, s3, s2
+; GFX1250-NEXT: v_mov_b32_e32 v1, s2
+; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: s_test_imax_sge_i16:
; EG: ; %bb.0:
; EG-NEXT: ALU 0, @10, KC0[], KC1[]
@@ -844,6 +1120,17 @@ define amdgpu_kernel void @test_umax_ugt_i64(ptr addrspace(1) %out, i64 %a, i64
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: test_umax_ugt_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5]
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: test_umax_ugt_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -886,6 +1173,17 @@ define amdgpu_kernel void @test_umax_uge_i64(ptr addrspace(1) %out, i64 %a, i64
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: test_umax_uge_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_u64 v[0:1], s[2:3], s[4:5]
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: test_umax_uge_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -928,6 +1226,17 @@ define amdgpu_kernel void @test_imax_sgt_i64(ptr addrspace(1) %out, i64 %a, i64
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: test_imax_sgt_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5]
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: test_imax_sgt_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
@@ -970,6 +1279,17 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr addrspace(1) %out, i64 %a, i64
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; SI-NEXT: s_endpgm
;
+; GFX1250-LABEL: test_imax_sge_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-NEXT: s_wait_xcnt 0x0
+; GFX1250-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX1250-NEXT: v_mov_b32_e32 v2, 0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_max_i64 v[0:1], s[2:3], s[4:5]
+; GFX1250-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX1250-NEXT: s_endpgm
+;
; EG-LABEL: test_imax_sge_i64:
; EG: ; %bb.0:
; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]