diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll index b77b2f7..1ec4f25 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s @@ -19,6 +21,30 @@ define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; +; GFX9-LABEL: v_alignbyte_b32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v1, s1 +; GFX9-NEXT: v_mov_b32_e32 v2, s2 +; GFX9-NEXT: v_alignbyte_b32 v1, s0, v1, v2 +; GFX9-NEXT: global_store_dword v0, v1, s[6:7] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: v_alignbyte_b32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-NEXT: v_alignbyte_b32 v0, s0, s1, v0 +; GFX10-NEXT: global_store_dword v1, v0, s[4:5] +; GFX10-NEXT: s_endpgm +; ; GFX11-TRUE16-LABEL: v_alignbyte_b32: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_clause 0x1 @@ -73,6 +99,41 @@ define amdgpu_kernel void @v_alignbyte_b32_2(ptr addrspace(1) %out, ptr addrspac ; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GCN-NEXT: s_endpgm ; +; GFX9-LABEL: v_alignbyte_b32_2: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_load_dword v2, v0, s[6:7] glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_load_dword s2, s[4:5], 0x3c +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_alignbyte_b32 v1, v1, v2, s2 +; GFX9-NEXT: global_store_dword v0, v1, s[0:1] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: v_alignbyte_b32_2: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: global_load_dword v1, v0, s[2:3] glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_load_dword v0, v0, s[6:7] glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: s_load_dword s2, s[4:5], 0x3c +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_alignbyte_b32 v0, v1, v0, s2 +; GFX10-NEXT: global_store_dword v2, v0, s[0:1] +; GFX10-NEXT: s_endpgm +; ; GFX11-TRUE16-LABEL: v_alignbyte_b32_2: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_clause 0x1 |