; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s define float @raw.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; CHECK-LABEL: raw.buffer.load: ; CHECK: ; %bb.0: ; %main_body ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 m0, s20 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds ; CHECK-NEXT: v_mov_b32_e32 v0, s20 ; CHECK-NEXT: ; wait_asyncmark(1) ; CHECK-NEXT: s_waitcnt vmcnt(2) ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] main_body: call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.raw.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) call void @llvm.amdgcn.wait.asyncmark(i16 1) %res = load float, ptr addrspace(3) %lds ret float %res } define float @raw.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { ; CHECK-LABEL: raw.ptr.buffer.load: ; CHECK: ; %bb.0: ; %main_body ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 m0, s20 ; CHECK-NEXT: s_nop 0 ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:4 glc lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword off, s[16:19], 0 offset:8 slc lds ; CHECK-NEXT: v_mov_b32_e32 v0, s20 ; CHECK-NEXT: ; wait_asyncmark(1) ; CHECK-NEXT: s_waitcnt vmcnt(2) ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] main_body: call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.raw.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) call void @llvm.amdgcn.wait.asyncmark(i16 1) %res = load float, ptr addrspace(3) %lds ret float %res } define float @struct.buffer.load(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; CHECK-LABEL: struct.buffer.load: ; CHECK: ; %bb.0: ; %main_body ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 m0, s20 ; CHECK-NEXT: v_mov_b32_e32 v0, 8 ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds ; CHECK-NEXT: v_mov_b32_e32 v0, s20 ; CHECK-NEXT: ; wait_asyncmark(1) ; CHECK-NEXT: s_waitcnt vmcnt(2) ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] main_body: call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.struct.buffer.load.async.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) call void @llvm.amdgcn.wait.asyncmark(i16 1) %res = load float, ptr addrspace(3) %lds ret float %res } define float @struct.ptr.buffer.load(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) { ; CHECK-LABEL: struct.ptr.buffer.load: ; CHECK: ; %bb.0: ; %main_body ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_mov_b32 m0, s20 ; CHECK-NEXT: v_mov_b32_e32 v0, 8 ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:4 glc lds ; CHECK-NEXT: ; asyncmark ; CHECK-NEXT: buffer_load_dword v0, s[16:19], 0 idxen offset:8 slc lds ; CHECK-NEXT: v_mov_b32_e32 v0, s20 ; CHECK-NEXT: ; wait_asyncmark(1) ; CHECK-NEXT: s_waitcnt vmcnt(2) ; CHECK-NEXT: ds_read_b32 v0, v0 ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] main_body: call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) call void @llvm.amdgcn.asyncmark() call void @llvm.amdgcn.struct.ptr.buffer.load.async.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) call void @llvm.amdgcn.wait.asyncmark(i16 1) %res = load float, ptr addrspace(3) %lds ret float %res }