1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -passes=amdgpu-attributor -o %t.gfx7.bc %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -passes=amdgpu-attributor -o %t.gfx8.bc %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor -o %t.gfx9.bc %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %t.gfx7.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %t.gfx8.bc | FileCheck --check-prefixes=CHECK,PRE-GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %t.gfx9.bc | FileCheck --check-prefixes=CHECK,GFX9 %s
; CHECK: addrspacecast_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: is_shared_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: is_private_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: trap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: debugtrap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: ubsantrap_requires_queue_ptr
; PRE-GFX9: .amdhsa_user_sgpr_queue_ptr 1
; GFX9: .amdhsa_user_sgpr_queue_ptr 0
; CHECK: amdgcn_queue_ptr_requires_queue_ptr
; CHECK: .amdhsa_user_sgpr_queue_ptr 1
; On gfx8, the queue ptr is required for this addrspacecast.
; CHECK: - .args:
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: addrspacecast_requires_queue_ptr
define amdgpu_kernel void @addrspacecast_requires_queue_ptr(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
%flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
%flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
store volatile i32 1, ptr %flat.private
store volatile i32 2, ptr %flat.local
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-LABEL: .name: is_shared_requires_queue_ptr
define amdgpu_kernel void @is_shared_requires_queue_ptr(ptr %ptr) {
%is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
%zext = zext i1 %is.shared to i32
store volatile i32 %zext, ptr addrspace(1) poison
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: is_private_requires_queue_ptr
define amdgpu_kernel void @is_private_requires_queue_ptr(ptr %ptr) {
%is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
%zext = zext i1 %is.private to i32
store volatile i32 %zext, ptr addrspace(1) poison
ret void
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: trap_requires_queue_ptr
define amdgpu_kernel void @trap_requires_queue_ptr() {
call void @llvm.trap()
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: debugtrap_requires_queue_ptr
define amdgpu_kernel void @debugtrap_requires_queue_ptr() {
call void @llvm.debugtrap()
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-NOT: hidden_queue_ptr
; CHECK-LABEL: .name: ubsantrap_requires_queue_ptr
define amdgpu_kernel void @ubsantrap_requires_queue_ptr() {
call void @llvm.ubsantrap(i8 0)
unreachable
}
; CHECK: - .args:
; CHECK-NOT: hidden_queue_ptr
; CHECK-NOT: hidden_shared_base
; CHECK-NOT: hidden_private_base
; CHECK-LABEL: .name: amdgcn_queue_ptr_requires_queue_ptr
define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(ptr addrspace(1) %ptr) {
%queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
%dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
%dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
%queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
%implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
%dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
ret void
}
declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
declare i64 @llvm.amdgcn.dispatch.id()
declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
declare i1 @llvm.amdgcn.is.shared(ptr)
declare i1 @llvm.amdgcn.is.private(ptr)
declare void @llvm.trap()
declare void @llvm.debugtrap()
declare void @llvm.ubsantrap(i8 immarg)
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
|