diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/max3.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/max3.ll | 59 |
1 files changed, 52 insertions, 7 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/max3.ll b/llvm/test/CodeGen/AMDGPU/max3.ll index a757bb0..b922854 100644 --- a/llvm/test/CodeGen/AMDGPU/max3.ll +++ b/llvm/test/CodeGen/AMDGPU/max3.ll @@ -1,6 +1,7 @@ ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_1250 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GFX1250,GFX9_1250 %s ; GCN-LABEL: {{^}}v_test_imax3_sgt_i32: ; GCN: v_max3_i32 @@ -46,7 +47,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i32(ptr addrspace(1) %out, ptr addrs ; VI: v_max_i16 ; VI: v_max_i16 -; GFX9: v_max3_i16 +; GFX9_1250: v_max3_i16 define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid @@ -70,7 +71,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i16(ptr addrspace(1) %out, ptr addrs ; VI: v_max_u16 ; VI: v_max_u16 -; GFX9: v_max3_u16 +; GFX9_1250: v_max3_u16 define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i16, ptr addrspace(1) %aptr, i32 %tid @@ -94,7 +95,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i16(ptr addrspace(1) %out, ptr addrs ; VI: v_max_i16 ; VI: v_max_i16 -; GFX9: v_max3_i16 +; GFX9_1250: v_max3_i16 define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid @@ -118,7 +119,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i8(ptr addrspace(1) %out, ptr addrsp ; VI: v_max_u16 ; VI: v_max_u16 -; GFX9: v_max3_u16 +; GFX9_1250: v_max3_u16 define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i8, ptr addrspace(1) %aptr, i32 %tid @@ -142,7 +143,7 @@ define amdgpu_kernel void @v_test_umax3_ugt_i8(ptr addrspace(1) %out, ptr addrsp ; VI: v_max_i16 ; VI: v_max_i16 -; GFX9: v_max3_i16 +; GFX9_1250: v_max3_i16 define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid @@ -166,7 +167,7 @@ define amdgpu_kernel void @v_test_imax3_sgt_i7(ptr addrspace(1) %out, ptr addrsp ; VI: v_max_u16 ; VI: v_max_u16 -; GFX9: v_max3_u16 +; GFX9_1250: v_max3_u16 define amdgpu_kernel void @v_test_umax3_ugt_i7(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %gep0 = getelementptr i7, ptr addrspace(1) %aptr, i32 %tid @@ -260,6 +261,50 @@ define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrs ret void } +; GCN-LABEL: {{^}}v_test_imax3_sgt_v2i16: +; SI-COUNT-2: v_max3_i32 +; VI-COUNT-2: v_max_i16 +; GFX9-COUNT-2: v_pk_max_i16 +; GFX1250: v_pk_max3_i16 +define amdgpu_kernel void @v_test_imax3_sgt_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr <2 x i16>, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr <2 x i16>, ptr addrspace(1) %out, i32 %tid + %a = load <2 x i16>, ptr addrspace(1) %gep0 + %b = load <2 x i16>, ptr addrspace(1) %gep1 + %c = load <2 x i16>, ptr addrspace(1) %gep2 + %icmp0 = icmp sgt <2 x i16> %a, %b + %i0 = select <2 x i1> %icmp0, <2 x i16> %a, <2 x i16> %b + %icmp1 = icmp sgt <2 x i16> %i0, %c + %i1 = select <2 x i1> %icmp1, <2 x i16> %i0, <2 x i16> %c + store <2 x i16> %i1, ptr addrspace(1) %out + ret void +} + +; GCN-LABEL: {{^}}v_test_imax3_ugt_v2i16: +; SI-COUNT-2: v_max3_u32 +; VI-COUNT-2: v_max_u16 +; GFX9-COUNT-2: v_pk_max_u16 +; GFX1250: v_pk_max3_u16 +define amdgpu_kernel void @v_test_imax3_ugt_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep0 = getelementptr <2 x i16>, ptr addrspace(1) %aptr, i32 %tid + %gep1 = getelementptr <2 x i16>, ptr addrspace(1) %bptr, i32 %tid + %gep2 = getelementptr <2 x i16>, ptr addrspace(1) %cptr, i32 %tid + %outgep = getelementptr <2 x i16>, ptr addrspace(1) %out, i32 %tid + %a = load <2 x i16>, ptr addrspace(1) %gep0 + %b = load <2 x i16>, ptr addrspace(1) %gep1 + %c = load <2 x i16>, ptr addrspace(1) %gep2 + %icmp0 = icmp ugt <2 x i16> %a, %b + %i0 = select <2 x i1> %icmp0, <2 x i16> %a, <2 x i16> %b + %icmp1 = icmp ugt <2 x i16> %i0, %c + %i1 = select <2 x i1> %icmp1, <2 x i16> %i0, <2 x i16> %c + store <2 x i16> %i1, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #0 = { nounwind } |