1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds --amdgpu-lower-module-lds-strategy=module < %s | FileCheck %s
; Regression test for issue 160181
; One variable is chosen to be assigned at zero. Here, that's @both
; Then other variables should be allocated at fixed offsets from that provided
; they are allocated by all the other kernels that presently allocate the
; variable at address zero.
; The failure mode was in that second check - variables could be added to
; the module scope zero address struct even when some of the kernels allocating
; that struct do not need the additional variable.
; With current llvm, all three of these integers are put in the module scope struct, when
; neither kern_one or kern_two access all three.
@both = addrspace(3) global i32 poison
@both_second = addrspace(3) global i16 poison ; a second field in the module struct
@one = addrspace(3) global i32 poison
@two = addrspace(3) global i32 poison
;.
; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t poison, align 4, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(3) @llvm.amdgcn.module.lds to ptr)], section "llvm.metadata"
;.
define void @func_one() {
; CHECK-LABEL: define {{[^@]+}}@func_one() {
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1:![0-9]+]]
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18:![0-9]+]]
; CHECK-NEXT: store i16 10, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23:![0-9]+]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
store i32 %val0, ptr addrspace(3) @one
store i16 10, ptr addrspace(3) @both_second
ret void
}
define amdgpu_kernel void @kern_one() {
; CHECK-LABEL: define {{[^@]+}}@kern_one
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !noalias [[META24:![0-9]+]]
; CHECK-NEXT: call void @func_one()
; CHECK-NEXT: ret void
;
entry:
call void @func_one()
ret void
}
define void @func_two() {
; CHECK-LABEL: define {{[^@]+}}@func_two() {
; CHECK-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
; CHECK-NEXT: store i32 [[VAL0]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25:![0-9]+]]
; CHECK-NEXT: store i16 20, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
; CHECK-NEXT: ret void
;
%val0 = load i32, ptr addrspace(3) @both
store i32 %val0, ptr addrspace(3) @two
store i16 20, ptr addrspace(3) @both_second
ret void
}
define amdgpu_kernel void @kern_two() {
; CHECK-LABEL: define {{[^@]+}}@kern_two
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ], !alias.scope [[META26:![0-9]+]], !noalias [[META27:![0-9]+]]
; CHECK-NEXT: call void @func_two()
; CHECK-NEXT: ret void
;
entry:
call void @func_two()
ret void
}
; Unrelated to the bug at hand, but if a variable is only
; reachable from a single kernel, it gets allocated to a fixed
; address independent of the module scope struct. This kernel
; means the key variables miss that optimisation while @both
; remains the best candidate for address zero allocation.
define void @func_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@func_block_direct_allocation() {
; CHECK-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 1), align 4, !noalias [[META18]]
; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 2), align 4, !noalias [[META25]]
; CHECK-NEXT: [[SUM:%.*]] = add i32 [[VAL1]], [[VAL2]]
; CHECK-NEXT: store i32 [[SUM]], ptr addrspace(3) @llvm.amdgcn.module.lds, align 4, !noalias [[META1]]
; CHECK-NEXT: store i16 30, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_MODULE_LDS_T]], ptr addrspace(3) @llvm.amdgcn.module.lds, i32 0, i32 3), align 4, !noalias [[META23]]
; CHECK-NEXT: ret void
;
%val1 = load i32, ptr addrspace(3) @one
%val2 = load i32, ptr addrspace(3) @two
%sum = add i32 %val1, %val2
store i32 %sum, ptr addrspace(3) @both
store i16 30, ptr addrspace(3) @both_second
ret void
}
define amdgpu_kernel void @kern_block_direct_allocation() {
; CHECK-LABEL: define {{[^@]+}}@kern_block_direct_allocation
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.module.lds) ]
; CHECK-NEXT: call void @func_block_direct_allocation()
; CHECK-NEXT: call void @func_one()
; CHECK-NEXT: call void @func_two()
; CHECK-NEXT: ret void
;
call void @func_block_direct_allocation()
call void @func_one()
call void @func_two()
ret void
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="16" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META4:![0-9]+]], [[META5:![0-9]+]], [[META6:![0-9]+]], [[META8:![0-9]+]], [[META9:![0-9]+]], [[META10:![0-9]+]], [[META12:![0-9]+]], [[META13:![0-9]+]], [[META14:![0-9]+]], [[META16:![0-9]+]], [[META17:![0-9]+]]}
; CHECK: [[META2]] = distinct !{[[META2]], [[META3:![0-9]+]]}
; CHECK: [[META3]] = distinct !{[[META3]]}
; CHECK: [[META4]] = distinct !{[[META4]], [[META3]]}
; CHECK: [[META5]] = distinct !{[[META5]], [[META3]]}
; CHECK: [[META6]] = distinct !{[[META6]], [[META7:![0-9]+]]}
; CHECK: [[META7]] = distinct !{[[META7]]}
; CHECK: [[META8]] = distinct !{[[META8]], [[META7]]}
; CHECK: [[META9]] = distinct !{[[META9]], [[META7]]}
; CHECK: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]]}
; CHECK: [[META11]] = distinct !{[[META11]]}
; CHECK: [[META12]] = distinct !{[[META12]], [[META11]]}
; CHECK: [[META13]] = distinct !{[[META13]], [[META11]]}
; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]}
; CHECK: [[META15]] = distinct !{[[META15]]}
; CHECK: [[META16]] = distinct !{[[META16]], [[META15]]}
; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]}
; CHECK: [[META18]] = !{[[META19:![0-9]+]], [[META2]], [[META5]], [[META20:![0-9]+]], [[META6]], [[META9]], [[META21:![0-9]+]], [[META10]], [[META13]], [[META22:![0-9]+]], [[META14]], [[META17]]}
; CHECK: [[META19]] = distinct !{[[META19]], [[META3]]}
; CHECK: [[META20]] = distinct !{[[META20]], [[META7]]}
; CHECK: [[META21]] = distinct !{[[META21]], [[META11]]}
; CHECK: [[META22]] = distinct !{[[META22]], [[META15]]}
; CHECK: [[META23]] = !{[[META19]], [[META4]], [[META5]], [[META20]], [[META8]], [[META9]], [[META21]], [[META12]], [[META13]], [[META22]], [[META16]], [[META17]]}
; CHECK: [[META24]] = !{[[META10]], [[META12]], [[META13]], [[META14]], [[META16]], [[META17]]}
; CHECK: [[META25]] = !{[[META19]], [[META2]], [[META4]], [[META20]], [[META6]], [[META8]], [[META21]], [[META10]], [[META12]], [[META22]], [[META14]], [[META16]]}
; CHECK: [[META26]] = !{[[META22]]}
; CHECK: [[META27]] = !{[[META14]], [[META16]], [[META17]]}
;.
|