diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll | 90 |
1 files changed, 45 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll index b5bb68e..e0ea08d 100644 --- a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll +++ b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll @@ -97,9 +97,9 @@ entry: define amdgpu_ps <2 x float> @s_load_b64_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b64_idxprom: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b64 s[4:5], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 ; GCN-NEXT: ; return to shader part epilog entry: %idxprom = zext i32 %idx to i64 @@ -111,10 +111,10 @@ entry: define amdgpu_ps <3 x float> @s_load_b96_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b96_idxprom: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_mov_b32_e32 v2, s6 ; GCN-NEXT: ; return to shader part epilog entry: %idxprom = zext i32 %idx to i64 @@ -126,10 +126,10 @@ entry: define amdgpu_ps <4 x float> @s_load_b128_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b128_idxprom: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 ; GCN-NEXT: ; return to shader part epilog entry: %idxprom = zext i32 %idx to i64 @@ -141,12 +141,12 @@ entry: define amdgpu_ps <8 x float> @s_load_b256_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b256_idxprom: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 -; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 ; GCN-NEXT: ; return to shader part epilog entry: %idxprom = zext i32 %idx to i64 @@ -158,16 +158,16 @@ entry: define amdgpu_ps <16 x float> @s_load_b512_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b512_idxprom: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 -; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 -; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 -; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 -; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 -; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15 +; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17 +; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19 ; GCN-NEXT: ; return to shader part epilog entry: %idxprom = zext i32 %idx to i64 @@ -275,11 +275,11 @@ entry: define amdgpu_ps <2 x float> @s_load_b64_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b64_idxprom_range: ; GCN: ; %bb.0: ; %entry -; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 +; GCN-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b64 s[2:3], s[0:1], s4 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GCN-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 ; GCN-NEXT: ; return to shader part epilog entry: %idx = load i32, ptr addrspace(4) %p, align 4, !range !0 @@ -294,10 +294,10 @@ define amdgpu_ps <3 x float> @s_load_b96_idxprom_range(ptr addrspace(4) align 4 ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s2 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_mov_b32_e32 v2, s6 ; GCN-NEXT: ; return to shader part epilog entry: %idx = load i32, ptr addrspace(4) %p, align 4, !range !0 @@ -312,10 +312,10 @@ define amdgpu_ps <4 x float> @s_load_b128_idxprom_range(ptr addrspace(4) align 4 ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 ; GCN-NEXT: ; return to shader part epilog entry: %idx = load i32, ptr addrspace(4) %p, align 4, !range !0 @@ -330,12 +330,12 @@ define amdgpu_ps <8 x float> @s_load_b256_idxprom_range(ptr addrspace(4) align 4 ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 -; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 ; GCN-NEXT: ; return to shader part epilog entry: %idx = load i32, ptr addrspace(4) %p, align 4, !range !0 @@ -350,16 +350,16 @@ define amdgpu_ps <16 x float> @s_load_b512_idxprom_range(ptr addrspace(4) align ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset +; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 -; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 -; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 -; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 -; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 -; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 -; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 -; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 -; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 +; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 +; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 +; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9 +; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11 +; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15 +; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17 +; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19 ; GCN-NEXT: ; return to shader part epilog entry: %idx = load i32, ptr addrspace(4) %p, align 4, !range !0 |