aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/max3.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/max3.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/max3.ll59
1 files changed, 52 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/max3.ll b/llvm/test/CodeGen/AMDGPU/max3.ll
index a757bb0..b922854 100644
--- a/llvm/test/CodeGen/AMDGPU/max3.ll
+++ b/llvm/test/CodeGen/AMDGPU/max3.ll
@@ -1,6 +1,7 @@
; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_1250 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250,GFX9_1250 %s
; GCN-LABEL: {{^}}v_test_imax3_sgt_i32:
; GCN: v_max3_i32
@@ -46,7 +47,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i32(ptr addrspace(1) %out, ptr addrs
; VI: v_max_i16
; VI: v_max_i16
-; GFX9: v_max3_i16
+; GFX9_1250: v_max3_i16
define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
@@ -70,7 +71,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrs
; VI: v_max_u16
; VI: v_max_u16
-; GFX9: v_max3_u16
+; GFX9_1250: v_max3_u16
define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid
@@ -94,7 +95,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrs
; VI: v_max_i16
; VI: v_max_i16
-; GFX9: v_max3_i16
+; GFX9_1250: v_max3_i16
define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
@@ -118,7 +119,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrsp
; VI: v_max_u16
; VI: v_max_u16
-; GFX9: v_max3_u16
+; GFX9_1250: v_max3_u16
define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid
@@ -142,7 +143,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrsp
; VI: v_max_i16
; VI: v_max_i16
-; GFX9: v_max3_i16
+; GFX9_1250: v_max3_i16
define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
@@ -166,7 +167,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrsp
; VI: v_max_u16
; VI: v_max_u16
-; GFX9: v_max3_u16
+; GFX9_1250: v_max3_u16
define amdgpu_kernel void @v_test_umax3_ugt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid
@@ -260,6 +261,50 @@ define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrs
ret void
}
+; GCN-LABEL: {{^}}v_test_imax3_sgt_v2i16:
+; SI-COUNT-2: v_max3_i32
+; VI-COUNT-2: v_max_i16
+; GFX9-COUNT-2: v_pk_max_i16
+; GFX1250: v_pk_max3_i16
+define amdgpu_kernel void @v_test_imax3_sgt_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr <2 x i16>, ptr addrspace(1) %aptr, i32 %tid
+ %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %bptr, i32 %tid
+ %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %cptr, i32 %tid
+ %outgep = getelementptr <2 x i16>, ptr addrspace(1) %out, i32 %tid
+ %a = load <2 x i16>, ptr addrspace(1) %gep0
+ %b = load <2 x i16>, ptr addrspace(1) %gep1
+ %c = load <2 x i16>, ptr addrspace(1) %gep2
+ %icmp0 = icmp sgt <2 x i16> %a, %b
+ %i0 = select <2 x i1> %icmp0, <2 x i16> %a, <2 x i16> %b
+ %icmp1 = icmp sgt <2 x i16> %i0, %c
+ %i1 = select <2 x i1> %icmp1, <2 x i16> %i0, <2 x i16> %c
+ store <2 x i16> %i1, ptr addrspace(1) %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}v_test_imax3_ugt_v2i16:
+; SI-COUNT-2: v_max3_u32
+; VI-COUNT-2: v_max_u16
+; GFX9-COUNT-2: v_pk_max_u16
+; GFX1250: v_pk_max3_u16
+define amdgpu_kernel void @v_test_imax3_ugt_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 {
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %gep0 = getelementptr <2 x i16>, ptr addrspace(1) %aptr, i32 %tid
+ %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %bptr, i32 %tid
+ %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %cptr, i32 %tid
+ %outgep = getelementptr <2 x i16>, ptr addrspace(1) %out, i32 %tid
+ %a = load <2 x i16>, ptr addrspace(1) %gep0
+ %b = load <2 x i16>, ptr addrspace(1) %gep1
+ %c = load <2 x i16>, ptr addrspace(1) %gep2
+ %icmp0 = icmp ugt <2 x i16> %a, %b
+ %i0 = select <2 x i1> %icmp0, <2 x i16> %a, <2 x i16> %b
+ %icmp1 = icmp ugt <2 x i16> %i0, %c
+ %i1 = select <2 x i1> %icmp1, <2 x i16> %i0, <2 x i16> %c
+ store <2 x i16> %i1, ptr addrspace(1) %out
+ ret void
+}
+
declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }