aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll130
1 files changed, 130 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index e3437fd..a42c8ac7 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -767,6 +767,136 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
ret void
}
+define amdgpu_gfx_whole_wave void @realign_stack(i1 %active, i32 %x) {
+; DAGISEL-LABEL: realign_stack:
+; DAGISEL: ; %bb.0:
+; DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL-NEXT: s_wait_expcnt 0x0
+; DAGISEL-NEXT: s_wait_samplecnt 0x0
+; DAGISEL-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL-NEXT: s_wait_kmcnt 0x0
+; DAGISEL-NEXT: s_mov_b32 s1, s33
+; DAGISEL-NEXT: s_add_co_i32 s33, s32, 0x3ff
+; DAGISEL-NEXT: s_wait_alu 0xfffe
+; DAGISEL-NEXT: s_and_b32 s33, s33, 0xfffffc00
+; DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; DAGISEL-NEXT: s_mov_b32 s2, s34
+; DAGISEL-NEXT: s_mov_b32 s34, s32
+; DAGISEL-NEXT: s_addk_co_i32 s32, 0x800
+; DAGISEL-NEXT: s_wait_storecnt 0x0
+; DAGISEL-NEXT: scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
+; DAGISEL-NEXT: s_wait_storecnt 0x0
+; DAGISEL-NEXT: s_wait_alu 0xfffe
+; DAGISEL-NEXT: s_mov_b32 s32, s34
+; DAGISEL-NEXT: s_mov_b32 s34, s2
+; DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; DAGISEL-NEXT: s_mov_b32 s33, s1
+; DAGISEL-NEXT: s_wait_alu 0xfffe
+; DAGISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: realign_stack:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL-NEXT: s_wait_expcnt 0x0
+; GISEL-NEXT: s_wait_samplecnt 0x0
+; GISEL-NEXT: s_wait_bvhcnt 0x0
+; GISEL-NEXT: s_wait_kmcnt 0x0
+; GISEL-NEXT: s_mov_b32 s1, s33
+; GISEL-NEXT: s_add_co_i32 s33, s32, 0x3ff
+; GISEL-NEXT: s_wait_alu 0xfffe
+; GISEL-NEXT: s_and_b32 s33, s33, 0xfffffc00
+; GISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; GISEL-NEXT: s_mov_b32 s2, s34
+; GISEL-NEXT: s_mov_b32 s34, s32
+; GISEL-NEXT: s_addk_co_i32 s32, 0x800
+; GISEL-NEXT: s_wait_storecnt 0x0
+; GISEL-NEXT: scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
+; GISEL-NEXT: s_wait_storecnt 0x0
+; GISEL-NEXT: s_wait_alu 0xfffe
+; GISEL-NEXT: s_mov_b32 s32, s34
+; GISEL-NEXT: s_mov_b32 s34, s2
+; GISEL-NEXT: s_mov_b32 exec_lo, s0
+; GISEL-NEXT: s_mov_b32 s33, s1
+; GISEL-NEXT: s_wait_alu 0xfffe
+; GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; DAGISEL64-LABEL: realign_stack:
+; DAGISEL64: ; %bb.0:
+; DAGISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
+; DAGISEL64-NEXT: s_wait_expcnt 0x0
+; DAGISEL64-NEXT: s_wait_samplecnt 0x0
+; DAGISEL64-NEXT: s_wait_bvhcnt 0x0
+; DAGISEL64-NEXT: s_wait_kmcnt 0x0
+; DAGISEL64-NEXT: s_mov_b32 s2, s33
+; DAGISEL64-NEXT: s_add_co_i32 s33, s32, 0x3ff
+; DAGISEL64-NEXT: s_wait_alu 0xfffe
+; DAGISEL64-NEXT: s_and_b32 s33, s33, 0xfffffc00
+; DAGISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; DAGISEL64-NEXT: s_mov_b32 s3, s34
+; DAGISEL64-NEXT: s_mov_b32 s34, s32
+; DAGISEL64-NEXT: s_addk_co_i32 s32, 0x800
+; DAGISEL64-NEXT: s_wait_storecnt 0x0
+; DAGISEL64-NEXT: scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
+; DAGISEL64-NEXT: s_wait_storecnt 0x0
+; DAGISEL64-NEXT: s_wait_alu 0xfffe
+; DAGISEL64-NEXT: s_mov_b32 s32, s34
+; DAGISEL64-NEXT: s_mov_b32 s34, s3
+; DAGISEL64-NEXT: s_mov_b64 exec, s[0:1]
+; DAGISEL64-NEXT: s_mov_b32 s33, s2
+; DAGISEL64-NEXT: s_wait_alu 0xfffe
+; DAGISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GISEL64-LABEL: realign_stack:
+; GISEL64: ; %bb.0:
+; GISEL64-NEXT: s_wait_loadcnt_dscnt 0x0
+; GISEL64-NEXT: s_wait_expcnt 0x0
+; GISEL64-NEXT: s_wait_samplecnt 0x0
+; GISEL64-NEXT: s_wait_bvhcnt 0x0
+; GISEL64-NEXT: s_wait_kmcnt 0x0
+; GISEL64-NEXT: s_mov_b32 s2, s33
+; GISEL64-NEXT: s_add_co_i32 s33, s32, 0x3ff
+; GISEL64-NEXT: s_wait_alu 0xfffe
+; GISEL64-NEXT: s_and_b32 s33, s33, 0xfffffc00
+; GISEL64-NEXT: s_xor_saveexec_b64 s[0:1], -1
+; GISEL64-NEXT: s_mov_b32 s3, s34
+; GISEL64-NEXT: s_mov_b32 s34, s32
+; GISEL64-NEXT: s_addk_co_i32 s32, 0x800
+; GISEL64-NEXT: s_wait_storecnt 0x0
+; GISEL64-NEXT: scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
+; GISEL64-NEXT: s_wait_storecnt 0x0
+; GISEL64-NEXT: s_wait_alu 0xfffe
+; GISEL64-NEXT: s_mov_b32 s32, s34
+; GISEL64-NEXT: s_mov_b32 s34, s3
+; GISEL64-NEXT: s_mov_b64 exec, s[0:1]
+; GISEL64-NEXT: s_mov_b32 s33, s2
+; GISEL64-NEXT: s_wait_alu 0xfffe
+; GISEL64-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-DAGISEL-LABEL: realign_stack:
+; GFX1250-DAGISEL: ; %bb.0:
+; GFX1250-DAGISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-DAGISEL-NEXT: s_wait_kmcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s1, s33
+; GFX1250-DAGISEL-NEXT: s_add_co_i32 s33, s32, 0x3ff
+; GFX1250-DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-DAGISEL-NEXT: s_and_b32 s33, s33, 0xfffffc00
+; GFX1250-DAGISEL-NEXT: s_xor_saveexec_b32 s0, -1
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s2, s34
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s34, s32
+; GFX1250-DAGISEL-NEXT: s_addk_co_i32 s32, 0x800
+; GFX1250-DAGISEL-NEXT: scratch_store_b32 off, v0, s33 scope:SCOPE_SYS
+; GFX1250-DAGISEL-NEXT: s_wait_storecnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s32, s34
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s34, s2
+; GFX1250-DAGISEL-NEXT: s_wait_xcnt 0x0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 exec_lo, s0
+; GFX1250-DAGISEL-NEXT: s_mov_b32 s33, s1
+; GFX1250-DAGISEL-NEXT: s_set_pc_i64 s[30:31]
+ %fussy = alloca i32, align 1024, addrspace(5)
+ store volatile i32 %x, ptr addrspace(5) %fussy, align 1024
+ ret void
+}
+
define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
; DAGISEL-LABEL: multiple_blocks:
; DAGISEL: ; %bb.0: