diff options
| -rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 8 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt | 1 |
3 files changed, 9 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 44aa249..d6ca408 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -56,6 +56,11 @@ static cl::opt<bool> EnableParallelRegionMerging( cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, cl::init(false)); +static cl::opt<bool> + DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, + cl::desc("Disable function internalization."), + cl::Hidden, cl::init(false)); + static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), cl::Hidden); static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", @@ -3824,7 +3829,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { DenseSet<const Function *> InternalizedFuncs; if (isOpenMPDevice(M)) for (Function &F : M) - if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F)) { + if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && + !DisableInternalization) { if (Attributor::internalizeFunction(F, /* Force */ true)) { InternalizedFuncs.insert(&F); } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt index 6371230..903c8d9 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -107,6 +107,7 @@ macro(add_cuda_bc_library) set(cu_cmd ${CLANG_TOOL} -xc++ -c + -mllvm -openmp-opt-disable-internalization -std=c++14 -ffreestanding -target amdgcn-amd-amdhsa diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt index 5120a9c..2f16dbb 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt @@ -153,6 +153,7 @@ set(cuda_src_files # Set flags for LLVM Bitcode compilation. set(bc_flags -S -x c++ -O1 -std=c++14 + -mllvm -openmp-opt-disable-internalization -target nvptx64 -Xclang -emit-llvm-bc -Xclang -aux-triple -Xclang ${aux_triple} |
