diff options
author | JonChesterfield <jonathanchesterfield@gmail.com> | 2019-12-04 16:43:16 +0000 |
---|---|---|
committer | Jon Chesterfield <jonathanchesterfield@gmail.com> | 2019-12-04 16:43:37 +0000 |
commit | 3ada8d2a87a2e818ea5302f40dbb0319d95b1554 (patch) | |
tree | 5f054d522f3410878e87a79c5bd32ac587617c1a | |
parent | 9b962d83ece841e43fd2823375dc6ddc94c1b178 (diff) | |
download | llvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.zip llvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.tar.gz llvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.tar.bz2 |
[libomptarget] Build a minimal deviceRTL for amdgcn
Summary:
[libomptarget] Build a minimal deviceRTL for amdgcn
Repeat of D70414, with an include path fixed. Diff for sanity checking.
The CMakeLists.txt file is functionally identical to the one used in the aomp fork.
Whitespace changes were made based on nvptx/CMakeLists.txt, plus the
copyright notice updated to match (Greg was the original author so would
like his sign off on that here).
This change will build a small subset of the deviceRTL if an appropriate toolchain is
available, e.g. a local install of rocm. Support.h is moved from nvptx as a dependency
of debug.h.
Reviewers: ABataev, jdoerfert
Reviewed By: ABataev
Subscribers: jvesely, mgorny, jfb, openmp-commits, jdoerfert
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D70971
-rw-r--r-- | openmp/libomptarget/deviceRTLs/CMakeLists.txt | 3 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt | 136 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 31 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/common/debug.h | 4 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/common/device_environment.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h) | 2 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/common/support.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/support.h) | 0 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu | 2 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h | 2 | ||||
-rw-r--r-- | openmp/libomptarget/deviceRTLs/nvptx/src/support.cu | 2 |
9 files changed, 162 insertions, 20 deletions
diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt index 9723fb8..8bbf987 100644 --- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt +++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt @@ -6,8 +6,9 @@ # # ##===----------------------------------------------------------------------===## # -# Build a device RTL for each available machine available. +# Build a device RTL for each available machine. # ##===----------------------------------------------------------------------===## +add_subdirectory(amdgcn) add_subdirectory(nvptx) diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt new file mode 100644 index 0000000..6b82b4e --- /dev/null +++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt @@ -0,0 +1,136 @@ +##===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +##===----------------------------------------------------------------------===## +# +# Build the AMDGCN Device RTL if the ROCM tools are available +# +##===----------------------------------------------------------------------===## + +find_package(LLVM QUIET CONFIG + PATHS + $ENV{AOMP} + $ENV{HOME}/rocm/aomp + /opt/rocm/aomp + /usr/lib/rocm/aomp + ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR} + ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR} + ${CMAKE_CXX_COMPILER_DIR} + NO_DEFAULT_PATH) + +if (LLVM_DIR) + libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake") +else() + libomptarget_say("Not building AMDGCN device RTL: AOMP not found") + return() +endif() + +set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX}) + +if (AOMP_INSTALL_PREFIX) + set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin) +else() + set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin) +endif() + +libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}") + +project(omptarget-amdgcn) + +add_custom_target(omptarget-amdgcn ALL) + +#optimization level +set(optimization_level 2) + +# Activate RTL message dumps if requested by the user. +if(LIBOMPTARGET_NVPTX_DEBUG) + set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1) +endif() + +get_filename_component(devicertl_base_directory + ${CMAKE_CURRENT_SOURCE_DIR} + DIRECTORY) + +set(cuda_sources + ${devicertl_base_directory}/common/src/cancel.cu + ${devicertl_base_directory}/common/src/critical.cu) + +set(h_files + ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h + ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h + ${devicertl_base_directory}/common/debug.h + ${devicertl_base_directory}/common/device_environment.h + ${devicertl_base_directory}/common/state-queue.h + ${devicertl_base_directory}/common/state-queuei.h + ${devicertl_base_directory}/common/support.h) + +# for both in-tree and out-of-tree build +if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY) + set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR}) +else() + set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}) +endif() + +# create libraries +set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900) +if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST) + set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST}) +endif() + +macro(add_cuda_bc_library) + set(cu_cmd ${AOMP_BINDIR}/clang++ + -std=c++11 + -fcuda-rdc + -fvisibility=default + --cuda-device-only + -Wno-unused-value + -x hip + -O${optimization_level} + --cuda-gpu-arch=${mcpu} + ${CUDA_DEBUG} + -I${CMAKE_CURRENT_SOURCE_DIR}/src + -I${devicertl_base_directory}) + + set(bc1_files) + + foreach(file ${ARGN}) + get_filename_component(fname ${file} NAME_WE) + set(bc1_filename ${fname}.${mcpu}.bc) + + add_custom_command( + OUTPUT ${bc1_filename} + COMMAND ${cu_cmd} ${file} -o ${bc1_filename} + DEPENDS ${file} ${h_files}) + + list(APPEND bc1_files ${bc1_filename}) + endforeach() + + add_custom_command( + OUTPUT linkout.cuda.${mcpu}.bc + COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc + DEPENDS ${bc1_files}) + + list(APPEND bc_files linkout.cuda.${mcpu}.bc) +endmacro() + +set(libname "omptarget-amdgcn") + +foreach(mcpu ${mcpus}) + set(bc_files) + add_cuda_bc_library(${cuda_sources}) + + set(bc_libname lib${libname}-${mcpu}.bc) + add_custom_command( + OUTPUT ${bc_libname} + COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname} + DEPENDS ${bc_files}) + + add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname}) + + install(FILES ${OUTPUTDIR}/${bc_libname} + DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice" + ) +endforeach() diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h index b2d06ab..c6e082c 100644 --- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h +++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h @@ -72,8 +72,6 @@ EXTERN uint64_t __lanemask_lt(); // thread's lane number in the warp EXTERN uint64_t __lanemask_gt(); -EXTERN void llvm_amdgcn_s_barrier(); - // CU id EXTERN unsigned __smid(); @@ -101,25 +99,21 @@ INLINE uint32_t __kmpc_impl_smid() { return __smid(); } -INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); } +INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); } -INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); } +INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); } INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() { return __ballot64(1); } -INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var, - int32_t SrcLane) { - return __shfl(Var, SrcLane, WARPSIZE); -} +EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var, + int32_t SrcLane); -INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var, - uint32_t Delta, int32_t Width) { - return __shfl_down(Var, Delta, Width); -} +EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var, + uint32_t Delta, int32_t Width); -INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); } +INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); } INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { // we have protected the master warp from releasing from its barrier @@ -128,4 +122,15 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) { __builtin_amdgcn_s_barrier(); } +// DEVICE versions of part of libc +extern "C" { +DEVICE __attribute__((noreturn)) void +__assertfail(const char *, const char *, unsigned, const char *, size_t); +INLINE static void __assert_fail(const char *__message, const char *__file, + unsigned int __line, const char *__function) { + __assertfail(__message, __file, __line, __function, sizeof(char)); +} +DEVICE int printf(const char *, ...); +} + #endif diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h index 3388b04..8bb4e3a 100644 --- a/openmp/libomptarget/deviceRTLs/common/debug.h +++ b/openmp/libomptarget/deviceRTLs/common/debug.h @@ -28,7 +28,7 @@ #ifndef _OMPTARGET_NVPTX_DEBUG_H_ #define _OMPTARGET_NVPTX_DEBUG_H_ -#include "device_environment.h" +#include "common/device_environment.h" //////////////////////////////////////////////////////////////////////////////// // set desired level of debugging @@ -128,7 +128,7 @@ #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING #include <stdio.h> -#include "support.h" +#include "common/support.h" template <typename... Arguments> NOINLINE static void log(const char *fmt, Arguments... parameters) { diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h b/openmp/libomptarget/deviceRTLs/common/device_environment.h index b2f65af..68a7757 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h +++ b/openmp/libomptarget/deviceRTLs/common/device_environment.h @@ -19,6 +19,6 @@ struct omptarget_device_environmentTy { int32_t debug_level; }; -extern __device__ omptarget_device_environmentTy omptarget_device_environment; +extern DEVICE omptarget_device_environmentTy omptarget_device_environment; #endif diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/common/support.h index 8cffd91..8cffd91 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h +++ b/openmp/libomptarget/deviceRTLs/common/support.h diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu index 181bceb..46ed8f4 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "omptarget-nvptx.h" -#include "device_environment.h" +#include "common/device_environment.h" //////////////////////////////////////////////////////////////////////////////// // global device environment diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index a446e00..51e88ad 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -24,7 +24,7 @@ #include "common/debug.h" // debug #include "interface.h" // interfaces with omp, compiler, and user #include "common/state-queue.h" -#include "support.h" +#include "common/support.h" #define OMPTARGET_NVPTX_VERSION 1.1 diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu index c8ac493..b5efa63 100644 --- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu +++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu @@ -10,7 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "support.h" +#include "common/support.h" #include "common/debug.h" #include "omptarget-nvptx.h" |