1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
;
; None of these functions should have the attribute amdgpu-no-flat-scratch-init. In these tests
; we manually set the attribute for the functions. The purpose is to test how the amdgpu-attributor pass
; handles this situation.
;
;; tests of addrspacecast
define void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
store volatile i32 0, ptr %stof
ret void
}
define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX9-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
; GFX10-NEXT: store volatile i32 0, ptr [[STOF]], align 4
; GFX10-NEXT: ret void
;
%stof = addrspacecast ptr addrspace(5) %ptr to ptr
store volatile i32 0, ptr %stof
ret void
}
define void @call_with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define void @call_with_private_to_flat_addrspacecast(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @call_with_private_to_flat_addrspacecast(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
ret void
}
define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(ptr addrspace(5) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX9-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_with_private_to_flat_addrspacecast_cc_kernel(
; GFX10-SAME: ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @with_private_to_flat_addrspacecast(ptr addrspace(5) [[PTR]])
; GFX10-NEXT: ret void
;
call void @with_private_to_flat_addrspacecast(ptr addrspace(5) %ptr)
ret void
}
;; tests of addrspacecast in a constant
define amdgpu_kernel void @private_constant_expression_use(ptr addrspace(1) nocapture %out) #0 {
; GFX9-LABEL: define amdgpu_kernel void @private_constant_expression_use(
; GFX9-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @private_constant_expression_use(
; GFX10-SAME: ptr addrspace(1) captures(none) [[OUT:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) [[OUT]], align 8
; GFX10-NEXT: ret void
;
store volatile ptr addrspacecast (ptr addrspace(5) inttoptr (i32 123 to ptr addrspace(5)) to ptr), ptr addrspace(1) %out, align 8
ret void
}
;; tests of intrinsics
define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX10-NEXT: ret void
;
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
store volatile i32 7, ptr %1, align 4
ret void
}
define void @calls_intrin_ascast(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define void @calls_intrin_ascast(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @calls_intrin_ascast(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: store volatile i32 7, ptr [[TMP1]], align 4
; GFX10-NEXT: ret void
;
%1 = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) %ptr)
store volatile i32 7, ptr %1, align 4
ret void
}
define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr) #0 {
; GFX9-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
; GFX9-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]])
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(
; GFX10-SAME: ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: call void @calls_intrin_ascast(ptr addrspace(3) [[PTR]])
; GFX10-NEXT: ret void
;
call void @calls_intrin_ascast(ptr addrspace(3) %ptr)
ret void
}
attributes #0 = { "amdgpu-no-flat-scratch-init" }
;.
; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
;.
; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,20" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
;.
|