1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
|
//==- AMDGPUArgumentrUsageInfo.h - Function Arg Usage Info -------*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUARGUMENTUSAGEINFO_H
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include <variant>
namespace llvm {
void initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(PassRegistry &);
class Function;
class LLT;
class raw_ostream;
class TargetRegisterClass;
class TargetRegisterInfo;
struct ArgDescriptor {
private:
friend struct AMDGPUFunctionArgInfo;
friend class AMDGPUArgumentUsageInfo;
std::variant<std::monostate, MCRegister, unsigned> Val;
// Bitmask to locate argument within the register.
unsigned Mask;
public:
ArgDescriptor(unsigned Mask = ~0u) : Mask(Mask) {}
static ArgDescriptor createRegister(Register Reg, unsigned Mask = ~0u) {
ArgDescriptor Ret(Mask);
Ret.Val = Reg.asMCReg();
return Ret;
}
static ArgDescriptor createStack(unsigned Offset, unsigned Mask = ~0u) {
ArgDescriptor Ret(Mask);
Ret.Val = Offset;
return Ret;
}
static ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask) {
// Copy the descriptor, then change the mask.
ArgDescriptor Ret(Arg);
Ret.Mask = Mask;
return Ret;
}
bool isSet() const { return !std::holds_alternative<std::monostate>(Val); }
explicit operator bool() const {
return isSet();
}
bool isRegister() const { return std::holds_alternative<MCRegister>(Val); }
MCRegister getRegister() const { return std::get<MCRegister>(Val); }
unsigned getStackOffset() const { return std::get<unsigned>(Val); }
unsigned getMask() const {
// None of the target SGPRs or VGPRs are expected to have a 'zero' mask.
assert(Mask && "Invalid mask.");
return Mask;
}
bool isMasked() const {
return Mask != ~0u;
}
void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr) const;
};
inline raw_ostream &operator<<(raw_ostream &OS, const ArgDescriptor &Arg) {
Arg.print(OS);
return OS;
}
struct KernArgPreloadDescriptor : public ArgDescriptor {
KernArgPreloadDescriptor() = default;
SmallVector<MCRegister> Regs;
};
struct AMDGPUFunctionArgInfo {
// clang-format off
enum PreloadedValue {
// SGPRS:
PRIVATE_SEGMENT_BUFFER = 0,
DISPATCH_PTR = 1,
QUEUE_PTR = 2,
KERNARG_SEGMENT_PTR = 3,
DISPATCH_ID = 4,
FLAT_SCRATCH_INIT = 5,
LDS_KERNEL_ID = 6, // LLVM internal, not part of the ABI
WORKGROUP_ID_X = 10, // Also used for cluster ID X.
WORKGROUP_ID_Y = 11, // Also used for cluster ID Y.
WORKGROUP_ID_Z = 12, // Also used for cluster ID Z.
PRIVATE_SEGMENT_WAVE_BYTE_OFFSET = 14,
IMPLICIT_BUFFER_PTR = 15,
IMPLICIT_ARG_PTR = 16,
PRIVATE_SEGMENT_SIZE = 17,
CLUSTER_WORKGROUP_ID_X = 21,
CLUSTER_WORKGROUP_ID_Y = 22,
CLUSTER_WORKGROUP_ID_Z = 23,
CLUSTER_WORKGROUP_MAX_ID_X = 24,
CLUSTER_WORKGROUP_MAX_ID_Y = 25,
CLUSTER_WORKGROUP_MAX_ID_Z = 26,
CLUSTER_WORKGROUP_MAX_FLAT_ID = 27,
// VGPRS:
WORKITEM_ID_X = 28,
WORKITEM_ID_Y = 29,
WORKITEM_ID_Z = 30,
FIRST_VGPR_VALUE = WORKITEM_ID_X
};
// clang-format on
// Kernel input registers setup for the HSA ABI in allocation order.
// User SGPRs in kernels
// XXX - Can these require argument spills?
ArgDescriptor PrivateSegmentBuffer;
ArgDescriptor DispatchPtr;
ArgDescriptor QueuePtr;
ArgDescriptor KernargSegmentPtr;
ArgDescriptor DispatchID;
ArgDescriptor FlatScratchInit;
ArgDescriptor PrivateSegmentSize;
ArgDescriptor LDSKernelId;
// System SGPRs in kernels.
ArgDescriptor WorkGroupIDX;
ArgDescriptor WorkGroupIDY;
ArgDescriptor WorkGroupIDZ;
ArgDescriptor WorkGroupInfo;
ArgDescriptor PrivateSegmentWaveByteOffset;
// Pointer with offset from kernargsegmentptr to where special ABI arguments
// are passed to callable functions.
ArgDescriptor ImplicitArgPtr;
// Input registers for non-HSA ABI
ArgDescriptor ImplicitBufferPtr;
// VGPRs inputs. For entry functions these are either v0, v1 and v2 or packed
// into v0, 10 bits per dimension if packed-tid is set.
ArgDescriptor WorkItemIDX;
ArgDescriptor WorkItemIDY;
ArgDescriptor WorkItemIDZ;
// Map the index of preloaded kernel arguments to its descriptor.
SmallDenseMap<int, KernArgPreloadDescriptor> PreloadKernArgs{};
// The first user SGPR allocated for kernarg preloading.
Register FirstKernArgPreloadReg;
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
getPreloadedValue(PreloadedValue Value) const;
static AMDGPUFunctionArgInfo fixedABILayout();
};
class AMDGPUArgumentUsageInfo {
private:
DenseMap<const Function *, AMDGPUFunctionArgInfo> ArgInfoMap;
public:
static const AMDGPUFunctionArgInfo ExternFunctionInfo;
static const AMDGPUFunctionArgInfo FixedABIFunctionInfo;
void print(raw_ostream &OS, const Module *M = nullptr) const;
void clear() { ArgInfoMap.clear(); }
void setFuncArgInfo(const Function &F, const AMDGPUFunctionArgInfo &ArgInfo) {
ArgInfoMap[&F] = ArgInfo;
}
const AMDGPUFunctionArgInfo &lookupFuncArgInfo(const Function &F) const;
bool invalidate(Module &M, const PreservedAnalyses &PA,
ModuleAnalysisManager::Invalidator &Inv);
};
class AMDGPUArgumentUsageInfoWrapperLegacy : public ImmutablePass {
std::unique_ptr<AMDGPUArgumentUsageInfo> AUIP;
public:
static char ID;
AMDGPUArgumentUsageInfoWrapperLegacy() : ImmutablePass(ID) {
initializeAMDGPUArgumentUsageInfoWrapperLegacyPass(
*PassRegistry::getPassRegistry());
}
AMDGPUArgumentUsageInfo &getArgUsageInfo() { return *AUIP; }
const AMDGPUArgumentUsageInfo &getArgUsageInfo() const { return *AUIP; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
bool doInitialization(Module &M) override {
AUIP = std::make_unique<AMDGPUArgumentUsageInfo>();
return false;
}
bool doFinalization(Module &M) override {
AUIP->clear();
return false;
}
void print(raw_ostream &OS, const Module *M = nullptr) const override {
AUIP->print(OS, M);
}
};
class AMDGPUArgumentUsageAnalysis
: public AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis> {
friend AnalysisInfoMixin<AMDGPUArgumentUsageAnalysis>;
static AnalysisKey Key;
public:
using Result = AMDGPUArgumentUsageInfo;
AMDGPUArgumentUsageInfo run(Module &M, ModuleAnalysisManager &);
};
} // end namespace llvm
#endif
|