aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp8
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt1
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt1
3 files changed, 9 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 44aa249..d6ca408 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -56,6 +56,11 @@ static cl::opt<bool> EnableParallelRegionMerging(
cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
cl::init(false));
+static cl::opt<bool>
+ DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
+ cl::desc("Disable function internalization."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
cl::Hidden);
static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
@@ -3824,7 +3829,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
DenseSet<const Function *> InternalizedFuncs;
if (isOpenMPDevice(M))
for (Function &F : M)
- if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) {
+ if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
+ !DisableInternalization) {
if (Attributor::internalizeFunction(F, /* Force */ true)) {
InternalizedFuncs.insert(&F);
} else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 6371230..903c8d9 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -107,6 +107,7 @@ macro(add_cuda_bc_library)
set(cu_cmd ${CLANG_TOOL}
-xc++
-c
+ -mllvm -openmp-opt-disable-internalization
-std=c++14
-ffreestanding
-target amdgcn-amd-amdhsa
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 5120a9c..2f16dbb 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -153,6 +153,7 @@ set(cuda_src_files
# Set flags for LLVM Bitcode compilation.
set(bc_flags -S -x c++ -O1 -std=c++14
+ -mllvm -openmp-opt-disable-internalization
-target nvptx64
-Xclang -emit-llvm-bc
-Xclang -aux-triple -Xclang ${aux_triple}