aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/include/clang/Basic/TargetInfo.h10
-rw-r--r--clang/lib/Basic/Targets/AMDGPU.cpp2
-rw-r--r--clang/lib/Basic/Targets/NVPTX.cpp2
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp3
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp13
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntimeGPU.h1
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h51
-rw-r--r--openmp/libomptarget/plugins/amdgpu/src/rtl.cpp16
8 files changed, 47 insertions, 51 deletions
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 21289b0..ab85594 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -210,8 +210,8 @@ protected:
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
- const unsigned *GridValues =
- nullptr; // Array of target-specific GPU grid values that must be
+ const llvm::omp::GV *GridValues =
+ nullptr; // target-specific GPU grid values that must be
// consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
@@ -1410,10 +1410,10 @@ public:
return LangAS::Default;
}
- /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
- unsigned getGridValue(llvm::omp::GVIDX gv) const {
+ /// Return a target-specific GPU grid values
+ const llvm::omp::GV &getGridValue() const {
assert(GridValues != nullptr && "GridValues not initialized");
- return GridValues[gv];
+ return *GridValues;
}
/// Retrieve the name of the platform as it is used in the
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index fac786d..cebb19e 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -335,7 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
- GridValues = llvm::omp::AMDGPUGpuGridValues;
+ GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 56f8a17..d1a34e4 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -65,7 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
- GridValues = llvm::omp::NVPTXGpuGridValues;
+ GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
index 33d4ab8..cac5faaa 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
using namespace clang;
@@ -35,7 +36,7 @@ CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
// return constant compile-time target-specific warp size
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
return Bld.getInt32(WarpSize);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 63feced..b13d559 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -339,7 +339,7 @@ class CheckVarsEscapingDeclContext final
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (IsInTTDRegion)
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
@@ -535,8 +535,7 @@ public:
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDBits =
- CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+ unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@@ -546,8 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
- llvm::omp::GV_Warp_Size_Log2_Mask);
+ unsigned LaneIDMask =
+ CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
@@ -1308,7 +1307,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
const RecordDecl *GlobalizedRD = nullptr;
llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
- unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
// Globalize team reductions variable unconditionally in all modes.
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
@@ -2089,7 +2088,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index b5f1b84..5d3b711 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -17,7 +17,6 @@
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
namespace clang {
namespace CodeGen {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 0b6aed1..1d7735e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,68 +29,69 @@ namespace omp {
///
/// Example usage in clang:
/// const unsigned slot_size =
-/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// #else
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
-enum GVIDX {
+
+struct GV {
/// The maximum number of workers in a kernel.
/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
- GV_Threads,
+ const unsigned GV_Threads;
/// The size reserved for data in a shared memory slot.
- GV_Slot_Size,
+ const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
- GV_Warp_Size,
+ const unsigned GV_Warp_Size;
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
/// for NVPTX.
- GV_Warp_Size_32,
+ const unsigned GV_Warp_Size_32;
/// The number of bits required to represent the max number of threads in warp
- GV_Warp_Size_Log2,
+ const unsigned GV_Warp_Size_Log2;
/// GV_Warp_Size * GV_Slot_Size,
- GV_Warp_Slot_Size,
+ const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams.
- GV_Max_Teams,
+ const unsigned GV_Max_Teams;
/// Global Memory Alignment
- GV_Mem_Align,
+ const unsigned GV_Mem_Align;
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_Mask,
+ const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
- GV_SimpleBufferSize,
+ const unsigned GV_SimpleBufferSize;
// The absolute maximum team size for a working group
- GV_Max_WG_Size,
+ const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
- GV_Default_WG_Size,
+ const unsigned GV_Default_WG_Size;
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
- GV_Max_Warp_Number,
+ const unsigned GV_Max_Warp_Number;
/// The slot size that should be reserved for a working warp.
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_MaskL
+ const unsigned GV_Warp_Size_Log2_MaskL;
};
/// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
+static constexpr GV AMDGPUGridValues = {
448, // GV_Threads
256, // GV_Slot_Size
64, // GV_Warp_Size
@@ -108,7 +109,7 @@ static constexpr unsigned AMDGPUGpuGridValues[] = {
};
/// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
+static constexpr GV NVPTXGridValues = {
992, // GV_Threads
256, // GV_Slot_Size
32, // GV_Warp_Size
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 4865ef6..31adc72 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -501,14 +501,11 @@ public:
static const unsigned HardTeamLimit =
(1 << 16) - 1; // 64K needed to fit in uint16
static const int DefaultNumTeams = 128;
- static const int Max_Teams =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_Teams];
- static const int Warp_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
- static const int Max_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_WG_Size];
+ static const int Max_Teams = llvm::omp::AMDGPUGridValues.GV_Max_Teams;
+ static const int Warp_Size = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
+ static const int Max_WG_Size = llvm::omp::AMDGPUGridValues.GV_Max_WG_Size;
static const int Default_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size];
+ llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
size_t size, hsa_agent_t);
@@ -1058,9 +1055,8 @@ int32_t __tgt_rtl_init_device(int device_id) {
DeviceInfo.WarpSize[device_id] = wavefront_size;
} else {
DP("Default wavefront size: %d\n",
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size]);
- DeviceInfo.WarpSize[device_id] =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
+ llvm::omp::AMDGPUGridValues.GV_Warp_Size);
+ DeviceInfo.WarpSize[device_id] = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
}
// Adjust teams to the env variables