aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll90
1 files changed, 45 insertions, 45 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
index b5bb68e..e0ea08d 100644
--- a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll
@@ -97,9 +97,9 @@ entry:
define amdgpu_ps <2 x float> @s_load_b64_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b64_idxprom:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b64 s[4:5], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@@ -111,10 +111,10 @@ entry:
define amdgpu_ps <3 x float> @s_load_b96_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b96_idxprom:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@@ -126,10 +126,10 @@ entry:
define amdgpu_ps <4 x float> @s_load_b128_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b128_idxprom:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@@ -141,12 +141,12 @@ entry:
define amdgpu_ps <8 x float> @s_load_b256_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b256_idxprom:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
-; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
+; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@@ -158,16 +158,16 @@ entry:
define amdgpu_ps <16 x float> @s_load_b512_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) {
; GCN-LABEL: s_load_b512_idxprom:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
-; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
-; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
-; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
-; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
-; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
+; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
+; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13
+; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15
+; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17
+; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19
; GCN-NEXT: ; return to shader part epilog
entry:
%idxprom = zext i32 %idx to i64
@@ -275,11 +275,11 @@ entry:
define amdgpu_ps <2 x float> @s_load_b64_idxprom_range(ptr addrspace(4) align 4 inreg %p) {
; GCN-LABEL: s_load_b64_idxprom_range:
; GCN: ; %bb.0: ; %entry
-; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
+; GCN-NEXT: s_load_b32 s4, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: s_load_b64 s[0:1], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b64 s[2:3], s[0:1], s4 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GCN-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@@ -294,10 +294,10 @@ define amdgpu_ps <3 x float> @s_load_b96_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: s_load_b96 s[0:2], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_mov_b32_e32 v2, s2
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@@ -312,10 +312,10 @@ define amdgpu_ps <4 x float> @s_load_b128_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: s_load_b128 s[0:3], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@@ -330,12 +330,12 @@ define amdgpu_ps <8 x float> @s_load_b256_idxprom_range(ptr addrspace(4) align 4
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: s_load_b256 s[0:7], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
-; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
+; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0
@@ -350,16 +350,16 @@ define amdgpu_ps <16 x float> @s_load_b512_idxprom_range(ptr addrspace(4) align
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: s_load_b512 s[0:15], s[0:1], s2 offset:0x0 scale_offset
+; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset
; GCN-NEXT: s_wait_kmcnt 0x0
-; GCN-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
-; GCN-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
-; GCN-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5
-; GCN-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7
-; GCN-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9
-; GCN-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11
-; GCN-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13
-; GCN-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15
+; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
+; GCN-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
+; GCN-NEXT: v_dual_mov_b32 v4, s8 :: v_dual_mov_b32 v5, s9
+; GCN-NEXT: v_dual_mov_b32 v6, s10 :: v_dual_mov_b32 v7, s11
+; GCN-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13
+; GCN-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v11, s15
+; GCN-NEXT: v_dual_mov_b32 v12, s16 :: v_dual_mov_b32 v13, s17
+; GCN-NEXT: v_dual_mov_b32 v14, s18 :: v_dual_mov_b32 v15, s19
; GCN-NEXT: ; return to shader part epilog
entry:
%idx = load i32, ptr addrspace(4) %p, align 4, !range !0