diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll | 61 |
1 files changed, 53 insertions, 8 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll index ed7d88b..dcac419 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -18,19 +18,22 @@ define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1 ; GCN-LABEL: load_1d_lwe: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: v_mov_b32_e32 v8, 0 -; GCN-NEXT: v_mov_b32_e32 v6, v0 ; GCN-NEXT: v_mov_b32_e32 v9, v8 ; GCN-NEXT: v_mov_b32_e32 v10, v8 ; GCN-NEXT: v_mov_b32_e32 v11, v8 ; GCN-NEXT: v_mov_b32_e32 v12, v8 -; GCN-NEXT: v_mov_b32_e32 v0, v8 -; GCN-NEXT: v_mov_b32_e32 v1, v9 -; GCN-NEXT: v_mov_b32_e32 v2, v10 -; GCN-NEXT: v_mov_b32_e32 v3, v11 -; GCN-NEXT: v_mov_b32_e32 v4, v12 -; GCN-NEXT: image_load v[0:4], v6, s[0:7] dmask:0xf unorm lwe +; GCN-NEXT: v_mov_b32_e32 v2, v8 +; GCN-NEXT: v_mov_b32_e32 v3, v9 +; GCN-NEXT: v_mov_b32_e32 v4, v10 +; GCN-NEXT: v_mov_b32_e32 v5, v11 +; GCN-NEXT: v_mov_b32_e32 v6, v12 +; GCN-NEXT: image_load v[2:6], v0, s[0:7] dmask:0xf unorm lwe ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: global_store_dword v8, v4, s[8:9] +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: v_mov_b32_e32 v2, v4 +; GCN-NEXT: v_mov_b32_e32 v3, v5 +; GCN-NEXT: global_store_dword v8, v6, s[8:9] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: ; return to shader part epilog main_body: @@ -75,6 +78,27 @@ main_body: } define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { +; GCN-LABEL: load_cube_lwe: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v10, 0 +; GCN-NEXT: v_mov_b32_e32 v11, v10 +; GCN-NEXT: v_mov_b32_e32 v12, v10 +; GCN-NEXT: v_mov_b32_e32 v13, v10 +; GCN-NEXT: v_mov_b32_e32 v14, v10 +; GCN-NEXT: v_mov_b32_e32 v4, v10 +; GCN-NEXT: v_mov_b32_e32 v5, v11 +; GCN-NEXT: v_mov_b32_e32 v6, v12 +; GCN-NEXT: v_mov_b32_e32 v7, v13 +; GCN-NEXT: v_mov_b32_e32 v8, v14 +; GCN-NEXT: image_load v[4:8], v[0:2], s[0:7] dmask:0xf unorm lwe da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, v4 +; GCN-NEXT: v_mov_b32_e32 v1, v5 +; GCN-NEXT: v_mov_b32_e32 v2, v6 +; GCN-NEXT: v_mov_b32_e32 v3, v7 +; GCN-NEXT: global_store_dword v10, v8, s[8:9] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 @@ -106,6 +130,27 @@ main_body: } define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { +; GCN-LABEL: load_2darray_lwe: +; GCN: ; %bb.0: ; %main_body +; GCN-NEXT: v_mov_b32_e32 v10, 0 +; GCN-NEXT: v_mov_b32_e32 v11, v10 +; GCN-NEXT: v_mov_b32_e32 v12, v10 +; GCN-NEXT: v_mov_b32_e32 v13, v10 +; GCN-NEXT: v_mov_b32_e32 v14, v10 +; GCN-NEXT: v_mov_b32_e32 v4, v10 +; GCN-NEXT: v_mov_b32_e32 v5, v11 +; GCN-NEXT: v_mov_b32_e32 v6, v12 +; GCN-NEXT: v_mov_b32_e32 v7, v13 +; GCN-NEXT: v_mov_b32_e32 v8, v14 +; GCN-NEXT: image_load v[4:8], v[0:2], s[0:7] dmask:0xf unorm lwe da +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, v4 +; GCN-NEXT: v_mov_b32_e32 v1, v5 +; GCN-NEXT: v_mov_b32_e32 v2, v6 +; GCN-NEXT: v_mov_b32_e32 v3, v7 +; GCN-NEXT: global_store_dword v10, v8, s[8:9] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: ; return to shader part epilog main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 |