aboutsummaryrefslogtreecommitdiff
path: root/openmp
diff options
context:
space:
mode:
authorJoseph Huber <jhuber6@vols.utk.edu>2022-10-05 11:03:24 -0500
committerJoseph Huber <jhuber6@vols.utk.edu>2022-10-05 14:40:01 -0500
commit92233159035d1b50face95d886901cf99035bd99 (patch)
treedb3253054e300745db8095a1c19104e4c4f0f228 /openmp
parenta8ec170e01dd0d0cbf8c059aa7addaaf3d886876 (diff)
downloadllvm-92233159035d1b50face95d886901cf99035bd99.zip
llvm-92233159035d1b50face95d886901cf99035bd99.tar.gz
llvm-92233159035d1b50face95d886901cf99035bd99.tar.bz2
[DeviceRTL] Allow IsSPMDMode to be optimized out in LTO mode
A previous patch merged the static and bitcode versions of the deviceRTL. We previously used the static library's separate compilation to set a special flag that prevented `IsSPMDMode` from being put in the used list and preventing it from being optimized out. When they were merged we could no longer do this separate compilation that allowed users of LTO to get more optimal code. This patch rearranges the code. The `IsSPMDMode` global is now transitively used by its inclusion in the changed `__keep_alive` function. This allows us to then manually delete the `__keep_alive` function from the module when building the static library via `llvm-extract`. The result is that the bitcode library correctly will maintain the needed shared state, while the static library will be able to internalize it and optimize it out. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D135280
Diffstat (limited to 'openmp')
-rw-r--r--openmp/libomptarget/DeviceRTL/CMakeLists.txt21
-rw-r--r--openmp/libomptarget/DeviceRTL/include/Types.h7
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Mapping.cpp2
-rw-r--r--openmp/libomptarget/DeviceRTL/src/Utils.cpp11
-rw-r--r--openmp/libomptarget/DeviceRTL/src/exports3
5 files changed, 28 insertions, 16 deletions
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 1a14a6d..ab7eaa9 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -31,7 +31,8 @@ if (LLVM_DIR)
find_program(PACKAGER_TOOL clang-offload-packager PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
find_program(LINK_TOOL llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
find_program(OPT_TOOL opt PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
- if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL))
+ find_program(EXTRACT_TOOL llvm-extract PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
+ if ((NOT CLANG_TOOL) OR (NOT LINK_TOOL) OR (NOT OPT_TOOL) OR (NOT EXTRACT_TOOL))
libomptarget_say("Not building DeviceRTL. Missing clang: ${CLANG_TOOL}, llvm-link: ${LINK_TOOL} or opt: ${OPT_TOOL}")
return()
else()
@@ -44,6 +45,7 @@ elseif (LLVM_TOOL_CLANG_BUILD AND NOT CMAKE_CROSSCOMPILING AND NOT OPENMP_STANDA
set(PACKAGER_TOOL $<TARGET_FILE:clang-offload-packager>)
set(LINK_TOOL $<TARGET_FILE:llvm-link>)
set(OPT_TOOL $<TARGET_FILE:opt>)
+ set(EXTRACT_TOOL $<TARGET_FILE:llvm-extract>)
libomptarget_say("Building DeviceRTL. Using clang from in-tree build")
else()
libomptarget_say("Not building DeviceRTL. No appropriate clang found")
@@ -118,6 +120,7 @@ set(src_files
set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512)
set(link_opt_flags -O3 -openmp-opt-disable)
set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports)
+set(link_extract_flag --func='__keep_alive' --delete)
# Prepend -I to each list element
set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}")
@@ -199,11 +202,18 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
COMMENT "Optimizing LLVM bitcode ${bclib_name}"
)
+ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
+ COMMAND ${EXTRACT_TOOL} ${link_extract_flag} ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+ -o ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
+ DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+ COMMENT "Extracting LLVM bitcode ${bclib_name}"
+ )
+
# Package the bitcode in the bitcode and embed it in an ELF for the static library
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
COMMAND ${PACKAGER_TOOL} -o ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
- "--image=file=${CMAKE_CURRENT_BINARY_DIR}/${bclib_name},triple=${target_triple},arch=${target_cpu},kind=openmp"
- DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${bclib_name}
+ "--image=file=${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name},triple=${target_triple},arch=${target_cpu},kind=openmp"
+ DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
COMMENT "Packaging LLVM offloading binary ${bclib_name}.out"
)
@@ -232,6 +242,11 @@ function(compileDeviceRTLLibrary target_cpu target_name target_triple)
DEPENDS opt
APPEND)
endif()
+ if("${EXTRACT_TOOL}" STREQUAL "$<TARGET_FILE:llvm-extract>")
+ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/extracted_${bclib_name}
+ DEPENDS opt
+ APPEND)
+ endif()
if("${PACKAGER_TOOL}" STREQUAL "$<TARGET_FILE:clang-offload-packager>")
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/packaged_${bclib_name}
DEPENDS clang-offload-packager
diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h
index d5209f1..700ebbd 100644
--- a/openmp/libomptarget/DeviceRTL/include/Types.h
+++ b/openmp/libomptarget/DeviceRTL/include/Types.h
@@ -213,13 +213,6 @@ enum OMPTgtExecModeFlags : int8_t {
#define CONSTANT(NAME) \
[[clang::address_space(4)]] NAME [[clang::loader_uninitialized]]
-// Attribute to keep alive certain definition for the bitcode library.
-#ifdef LIBOMPTARGET_BC_TARGET
-#define KEEP_ALIVE __attribute__((used, retain))
-#else
-#define KEEP_ALIVE
-#endif
-
///}
#endif
diff --git a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
index 172bbbf..f05e716 100644
--- a/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Mapping.cpp
@@ -276,7 +276,7 @@ uint32_t mapping::getNumberOfProcessorElements() {
// TODO: This is a workaround for initialization coming from kernels outside of
// the TU. We will need to solve this more correctly in the future.
-int __attribute__((weak)) KEEP_ALIVE SHARED(IsSPMDMode);
+int __attribute__((weak)) SHARED(IsSPMDMode);
void mapping::init(bool IsSPMD) {
if (mapping::isInitialThreadInLevel0(IsSPMD))
diff --git a/openmp/libomptarget/DeviceRTL/src/Utils.cpp b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
index 2aa0194..e4920ab 100644
--- a/openmp/libomptarget/DeviceRTL/src/Utils.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Utils.cpp
@@ -19,16 +19,17 @@
using namespace _OMP;
-namespace _OMP {
+extern "C" __attribute__((weak)) int IsSPMDMode;
+
/// Helper to keep code alive without introducing a performance penalty.
-__attribute__((weak, optnone, cold)) KEEP_ALIVE void keepAlive() {
+extern "C" __attribute__((weak, optnone, cold, used, retain)) void
+__keep_alive() {
__kmpc_get_hardware_thread_id_in_block();
__kmpc_get_hardware_num_threads_in_block();
__kmpc_get_warp_size();
- __kmpc_barrier_simple_spmd(nullptr, 0);
- __kmpc_barrier_simple_generic(nullptr, 0);
+ __kmpc_barrier_simple_spmd(nullptr, IsSPMDMode);
+ __kmpc_barrier_simple_generic(nullptr, IsSPMDMode);
}
-} // namespace _OMP
namespace impl {
diff --git a/openmp/libomptarget/DeviceRTL/src/exports b/openmp/libomptarget/DeviceRTL/src/exports
index b175dd1..b09e5b0 100644
--- a/openmp/libomptarget/DeviceRTL/src/exports
+++ b/openmp/libomptarget/DeviceRTL/src/exports
@@ -2,6 +2,9 @@ omp_*
*llvm_*
__kmpc_*
+__keep_alive
+IsSPMDMode
+
memcmp
printf
__assert_fail