1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
|
//===--- Level Zero Target RTL Implementation -----------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Level Zero RTL Options support.
//
//===----------------------------------------------------------------------===//
#ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H
#define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H
#include <level_zero/ze_api.h>
#include "Shared/EnvironmentVar.h"
#include "L0Defs.h"
namespace llvm::omp::target::plugin {
/// Command submission mode.
enum class CommandModeTy { Sync = 0, Async, AsyncOrdered };
/// Specialization constants used for a module compilation.
class SpecConstantsTy {
std::vector<uint32_t> ConstantIds;
std::vector<const void *> ConstantValues;
BumpPtrAllocator &Allocator;
public:
SpecConstantsTy(BumpPtrAllocator &Allocator) : Allocator(Allocator) {}
SpecConstantsTy(const SpecConstantsTy &) = delete;
SpecConstantsTy(SpecConstantsTy &&) = delete;
SpecConstantsTy &operator=(const SpecConstantsTy &) = delete;
SpecConstantsTy &operator=(const SpecConstantsTy &&) = delete;
SpecConstantsTy(const SpecConstantsTy &&Other)
: ConstantIds(std::move(Other.ConstantIds)),
ConstantValues(std::move(Other.ConstantValues)),
Allocator(Other.Allocator) {}
~SpecConstantsTy() = default;
template <typename T> void addConstant(uint32_t Id, T Val) {
T *ValuePtr =
reinterpret_cast<T *>(Allocator.Allocate(sizeof(T), alignof(T)));
*ValuePtr = Val;
ConstantIds.push_back(Id);
ConstantValues.push_back(reinterpret_cast<void *>(ValuePtr));
}
ze_module_constants_t getModuleConstants() const {
ze_module_constants_t Tmp{static_cast<uint32_t>(ConstantValues.size()),
ConstantIds.data(),
// Unfortunately we have to const_cast it.
// L0 data type should probably be fixed.
const_cast<const void **>(ConstantValues.data())};
return Tmp;
}
};
/// L0 Plugin flags.
struct L0OptionFlagsTy {
uint64_t UseMemoryPool : 1;
uint64_t Reserved : 63;
L0OptionFlagsTy() : UseMemoryPool(1), Reserved(0) {}
};
struct L0OptionsTy {
/// Binary flags.
L0OptionFlagsTy Flags;
/// Staging buffer size.
size_t StagingBufferSize = L0StagingBufferSize;
/// Staging buffer count.
size_t StagingBufferCount = L0StagingBufferCount;
struct MemPoolConfigTy {
bool Use;
int32_t AllocMax;
int32_t Capacity;
int32_t PoolSize;
};
/// Memory pool default parameters for each allocation kind:
/// {UseByDefault, AllocMax(MB), Capacity, PoolSize(MB)}
std::array<MemPoolConfigTy, 3> MemPoolConfig{
MemPoolConfigTy{true, 1, 4, 256}, // TARGET_ALLOC_DEVICE
MemPoolConfigTy{true, 1, 4, 256}, // TARGET_ALLOC_HOST
MemPoolConfigTy{true, 8, 4, 256}}; // TARGET_ALLOC_SHARED
/// Parameters for memory pools dedicated to reduction scratch space.
std::array<int32_t, 3> ReductionPoolInfo{256, 8, 8192};
/// Oversubscription rate for normal kernels.
uint32_t SubscriptionRate = 4;
/// Loop kernels with known ND-range may be known to have
/// few iterations and they may not exploit the offload device
/// to the fullest extent.
/// Let's assume a device has N total HW threads available,
/// and the kernel requires M hardware threads with LWS set to L.
/// If (M < N * ThinThreadsThreshold), then we will try
/// to iteratively divide L by 2 to increase the number of HW
/// threads used for executing the kernel. Effectively, we will
/// end up with L less than the kernel's SIMD width, so the HW
/// threads will not use all their SIMD lanes. This (presumably) should
/// allow more parallelism, because the stalls in the SIMD lanes
/// will be distributed across more HW threads, and the probability
/// of having a stall (or a sequence of stalls) on a critical path
/// in the kernel should decrease.
/// Anyway, this is just a heuristics that seems to work well for some
/// kernels (which poorly expose parallelism in the first place).
double ThinThreadsThreshold = 0.1;
// Compilation options for IGC.
// OpenCL 2.0 builtins (like atomic_load_explicit and etc.) are used by
// runtime, so we have to explicitly specify the "-cl-std=CL2.0" compilation
// option. With it, the SPIR-V will be converted to LLVM IR with OpenCL 2.0
// builtins. Otherwise, SPIR-V will be converted to LLVM IR with OpenCL 1.2
// builtins.
static constexpr std::string_view CompilationOptions = "-cl-std=CL2.0 ";
static constexpr std::string_view InternalCompilationOptions =
"-cl-take-global-address";
std::string UserCompilationOptions;
/// Spec constants used for all modules.
SpecConstantsTy CommonSpecConstants;
/// Command execution mode.
/// Whether the runtime uses asynchronous mode or not depends on the type of
/// devices and whether immediate command list is fully enabled.
CommandModeTy CommandMode = CommandModeTy::Async;
/// Controls if we need to reduce available HW threads. We need this
/// adjustment on XeHPG when Level Zero debug is enabled
/// (ZET_ENABLE_PROGRAM_DEBUGGING=1).
bool ZeDebugEnabled = false;
bool Init = false; // Have the options already been processed.
// Allocator for long-lived allocations (e.g. spec constants).
BumpPtrAllocator Allocator;
L0OptionsTy() : CommonSpecConstants(Allocator) {}
/// Read environment variables.
void processEnvironmentVars();
void init() {
if (!Init) {
processEnvironmentVars();
Init = true;
}
}
bool match(const StringEnvar &Var, const llvm::StringRef Matched) {
return Matched.equals_insensitive(Var.get());
}
}; // L0OptionsTy
} // namespace llvm::omp::target::plugin
#endif // OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_LEVEL_ZERO_L0OPTIONS_H
|