1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+block-vgpr-csr < %s | FileCheck -check-prefixes=CHECK,GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1200 -mattr=+block-vgpr-csr < %s | FileCheck -check-prefixes=CHECK,DAGISEL %s
define i32 @non_entry_func(i32 %x) {
; CHECK-LABEL: non_entry_func:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_wait_loadcnt_dscnt 0x0
; CHECK-NEXT: s_wait_expcnt 0x0
; CHECK-NEXT: s_wait_samplecnt 0x0
; CHECK-NEXT: s_wait_bvhcnt 0x0
; CHECK-NEXT: s_wait_kmcnt 0x0
; CHECK-NEXT: s_xor_saveexec_b32 s0, -1
; CHECK-NEXT: scratch_store_b32 off, v2, s32 offset:100 ; 4-byte Folded Spill
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_mov_b32 exec_lo, s0
; CHECK-NEXT: v_writelane_b32 v2, s48, 0
; CHECK-NEXT: s_mov_b32 m0, 0x110003
; CHECK-NEXT: v_mov_b32_e32 v1, v0
; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Spill
; CHECK-NEXT: scratch_store_block off, v[40:71], s32 offset:4
; CHECK-NEXT: s_mov_b32 m0, 1
; CHECK-NEXT: v_writelane_b32 v2, s49, 1
; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Spill
; CHECK-NEXT: scratch_store_block off, v[120:151], s32
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: s_nop
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: ; transferring at most v120 ; 128-byte Folded Reload
; CHECK-NEXT: scratch_load_block v[120:151], off, s32
; CHECK-NEXT: s_mov_b32 m0, 0x110003
; CHECK-NEXT: scratch_store_b32 off, v1, s32 offset:88
; CHECK-NEXT: ; transferring at most v40 v41 v56 v60 ; 128-byte Folded Reload
; CHECK-NEXT: scratch_load_block v[40:71], off, s32 offset:4
; CHECK-NEXT: v_mov_b32_e32 v0, v1
; CHECK-NEXT: v_readlane_b32 s49, v2, 1
; CHECK-NEXT: v_readlane_b32 s48, v2, 0
; CHECK-NEXT: s_xor_saveexec_b32 s0, -1
; CHECK-NEXT: scratch_load_b32 v2, off, s32 offset:100 ; 4-byte Folded Reload
; CHECK-NEXT: s_wait_alu 0xfffe
; CHECK-NEXT: s_mov_b32 exec_lo, s0
; CHECK-NEXT: s_wait_loadcnt 0x0
; CHECK-NEXT: s_setpc_b64 s[30:31]
%local = alloca i32, i32 3, addrspace(5)
store i32 %x, ptr addrspace(5) %local
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48},~{s49}"()
ret i32 %x
}
define amdgpu_kernel void @entry_func(i32 %x) {
; GISEL-LABEL: entry_func:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
; GISEL-NEXT: s_load_b32 s6, s[4:5], 0x0
; GISEL-NEXT: v_mov_b32_e32 v31, v0
; GISEL-NEXT: s_mov_b64 s[12:13], s[0:1]
; GISEL-NEXT: ;;#ASMSTART
; GISEL-NEXT: s_nop
; GISEL-NEXT: ;;#ASMEND
; GISEL-NEXT: s_add_co_u32 s8, s4, 4
; GISEL-NEXT: s_mov_b32 s0, non_entry_func@abs32@lo
; GISEL-NEXT: s_mov_b32 s1, non_entry_func@abs32@hi
; GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0
; GISEL-NEXT: s_mov_b64 s[4:5], s[12:13]
; GISEL-NEXT: s_mov_b32 s32, 0
; GISEL-NEXT: s_wait_kmcnt 0x0
; GISEL-NEXT: v_mov_b32_e32 v0, s6
; GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; GISEL-NEXT: s_wait_alu 0xfffe
; GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GISEL-NEXT: s_endpgm
;
; DAGISEL-LABEL: entry_func:
; DAGISEL: ; %bb.0:
; DAGISEL-NEXT: s_load_b32 s12, s[4:5], 0x0
; DAGISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
; DAGISEL-NEXT: v_mov_b32_e32 v31, v0
; DAGISEL-NEXT: s_mov_b64 s[6:7], s[0:1]
; DAGISEL-NEXT: ;;#ASMSTART
; DAGISEL-NEXT: s_nop
; DAGISEL-NEXT: ;;#ASMEND
; DAGISEL-NEXT: s_add_nc_u64 s[8:9], s[4:5], 4
; DAGISEL-NEXT: s_mov_b32 s1, non_entry_func@abs32@hi
; DAGISEL-NEXT: s_mov_b32 s0, non_entry_func@abs32@lo
; DAGISEL-NEXT: s_mov_b64 s[4:5], s[6:7]
; DAGISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
; DAGISEL-NEXT: s_mov_b32 s32, 0
; DAGISEL-NEXT: s_wait_kmcnt 0x0
; DAGISEL-NEXT: v_mov_b32_e32 v0, s12
; DAGISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
; DAGISEL-NEXT: s_endpgm
call void asm "s_nop", "~{v0},~{v8},~{v40},~{v41},~{v49},~{v52},~{v56},~{v60},~{v120},~{s0},~{s48}"()
%res = call i32 @non_entry_func(i32 %x)
ret void
}
|