aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonChesterfield <jonathanchesterfield@gmail.com>2019-12-04 16:43:16 +0000
committerJon Chesterfield <jonathanchesterfield@gmail.com>2019-12-04 16:43:37 +0000
commit3ada8d2a87a2e818ea5302f40dbb0319d95b1554 (patch)
tree5f054d522f3410878e87a79c5bd32ac587617c1a
parent9b962d83ece841e43fd2823375dc6ddc94c1b178 (diff)
downloadllvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.zip
llvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.tar.gz
llvm-3ada8d2a87a2e818ea5302f40dbb0319d95b1554.tar.bz2
[libomptarget] Build a minimal deviceRTL for amdgcn
Summary: [libomptarget] Build a minimal deviceRTL for amdgcn Repeat of D70414, with an include path fixed. Diff for sanity checking. The CMakeLists.txt file is functionally identical to the one used in the aomp fork. Whitespace changes were made based on nvptx/CMakeLists.txt, plus the copyright notice updated to match (Greg was the original author so would like his sign off on that here). This change will build a small subset of the deviceRTL if an appropriate toolchain is available, e.g. a local install of rocm. Support.h is moved from nvptx as a dependency of debug.h. Reviewers: ABataev, jdoerfert Reviewed By: ABataev Subscribers: jvesely, mgorny, jfb, openmp-commits, jdoerfert Tags: #openmp Differential Revision: https://reviews.llvm.org/D70971
-rw-r--r--openmp/libomptarget/deviceRTLs/CMakeLists.txt3
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt136
-rw-r--r--openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h31
-rw-r--r--openmp/libomptarget/deviceRTLs/common/debug.h4
-rw-r--r--openmp/libomptarget/deviceRTLs/common/device_environment.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h)2
-rw-r--r--openmp/libomptarget/deviceRTLs/common/support.h (renamed from openmp/libomptarget/deviceRTLs/nvptx/src/support.h)0
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h2
-rw-r--r--openmp/libomptarget/deviceRTLs/nvptx/src/support.cu2
9 files changed, 162 insertions, 20 deletions
diff --git a/openmp/libomptarget/deviceRTLs/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
index 9723fb8..8bbf987 100644
--- a/openmp/libomptarget/deviceRTLs/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/CMakeLists.txt
@@ -6,8 +6,9 @@
#
# ##===----------------------------------------------------------------------===##
#
-# Build a device RTL for each available machine available.
+# Build a device RTL for each available machine.
#
##===----------------------------------------------------------------------===##
+add_subdirectory(amdgcn)
add_subdirectory(nvptx)
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
new file mode 100644
index 0000000..6b82b4e
--- /dev/null
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -0,0 +1,136 @@
+##===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+##===----------------------------------------------------------------------===##
+#
+# Build the AMDGCN Device RTL if the ROCM tools are available
+#
+##===----------------------------------------------------------------------===##
+
+find_package(LLVM QUIET CONFIG
+ PATHS
+ $ENV{AOMP}
+ $ENV{HOME}/rocm/aomp
+ /opt/rocm/aomp
+ /usr/lib/rocm/aomp
+ ${LIBOMPTARGET_NVPTX_CUDA_COMPILER_DIR}
+ ${LIBOMPTARGET_NVPTX_CUDA_LINKER_DIR}
+ ${CMAKE_CXX_COMPILER_DIR}
+ NO_DEFAULT_PATH)
+
+if (LLVM_DIR)
+ libomptarget_say("Found LLVM ${LLVM_PACKAGE_VERSION}. Configure: ${LLVM_DIR}/LLVMConfig.cmake")
+else()
+ libomptarget_say("Not building AMDGCN device RTL: AOMP not found")
+ return()
+endif()
+
+set(AOMP_INSTALL_PREFIX ${LLVM_INSTALL_PREFIX})
+
+if (AOMP_INSTALL_PREFIX)
+ set(AOMP_BINDIR ${AOMP_INSTALL_PREFIX}/bin)
+else()
+ set(AOMP_BINDIR ${LLVM_BUILD_BINARY_DIR}/bin)
+endif()
+
+libomptarget_say("Building AMDGCN device RTL. LLVM_COMPILER_PATH=${AOMP_BINDIR}")
+
+project(omptarget-amdgcn)
+
+add_custom_target(omptarget-amdgcn ALL)
+
+#optimization level
+set(optimization_level 2)
+
+# Activate RTL message dumps if requested by the user.
+if(LIBOMPTARGET_NVPTX_DEBUG)
+ set(CUDA_DEBUG -DOMPTARGET_NVPTX_DEBUG=-1)
+endif()
+
+get_filename_component(devicertl_base_directory
+ ${CMAKE_CURRENT_SOURCE_DIR}
+ DIRECTORY)
+
+set(cuda_sources
+ ${devicertl_base_directory}/common/src/cancel.cu
+ ${devicertl_base_directory}/common/src/critical.cu)
+
+set(h_files
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/amdgcn_interface.h
+ ${CMAKE_CURRENT_SOURCE_DIR}/src/target_impl.h
+ ${devicertl_base_directory}/common/debug.h
+ ${devicertl_base_directory}/common/device_environment.h
+ ${devicertl_base_directory}/common/state-queue.h
+ ${devicertl_base_directory}/common/state-queuei.h
+ ${devicertl_base_directory}/common/support.h)
+
+# for both in-tree and out-of-tree build
+if (NOT CMAKE_ARCHIVE_OUTPUT_DIRECTORY)
+ set(OUTPUTDIR ${CMAKE_CURRENT_BINARY_DIR})
+else()
+ set(OUTPUTDIR ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY})
+endif()
+
+# create libraries
+set(mcpus gfx700 gfx701 gfx801 gfx803 gfx900)
+if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
+ set(mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
+endif()
+
+macro(add_cuda_bc_library)
+ set(cu_cmd ${AOMP_BINDIR}/clang++
+ -std=c++11
+ -fcuda-rdc
+ -fvisibility=default
+ --cuda-device-only
+ -Wno-unused-value
+ -x hip
+ -O${optimization_level}
+ --cuda-gpu-arch=${mcpu}
+ ${CUDA_DEBUG}
+ -I${CMAKE_CURRENT_SOURCE_DIR}/src
+ -I${devicertl_base_directory})
+
+ set(bc1_files)
+
+ foreach(file ${ARGN})
+ get_filename_component(fname ${file} NAME_WE)
+ set(bc1_filename ${fname}.${mcpu}.bc)
+
+ add_custom_command(
+ OUTPUT ${bc1_filename}
+ COMMAND ${cu_cmd} ${file} -o ${bc1_filename}
+ DEPENDS ${file} ${h_files})
+
+ list(APPEND bc1_files ${bc1_filename})
+ endforeach()
+
+ add_custom_command(
+ OUTPUT linkout.cuda.${mcpu}.bc
+ COMMAND ${AOMP_BINDIR}/llvm-link ${bc1_files} -o linkout.cuda.${mcpu}.bc
+ DEPENDS ${bc1_files})
+
+ list(APPEND bc_files linkout.cuda.${mcpu}.bc)
+endmacro()
+
+set(libname "omptarget-amdgcn")
+
+foreach(mcpu ${mcpus})
+ set(bc_files)
+ add_cuda_bc_library(${cuda_sources})
+
+ set(bc_libname lib${libname}-${mcpu}.bc)
+ add_custom_command(
+ OUTPUT ${bc_libname}
+ COMMAND ${AOMP_BINDIR}/llvm-link ${bc_files} | ${AOMP_BINDIR}/opt --always-inline -o ${OUTPUTDIR}/${bc_libname}
+ DEPENDS ${bc_files})
+
+ add_custom_target(lib${libname}-${mcpu} ALL DEPENDS ${bc_libname})
+
+ install(FILES ${OUTPUTDIR}/${bc_libname}
+ DESTINATION "${OPENMP_INSTALL_LIBDIR}/libdevice"
+ )
+endforeach()
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b2d06ab..c6e082c 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -72,8 +72,6 @@ EXTERN uint64_t __lanemask_lt();
// thread's lane number in the warp
EXTERN uint64_t __lanemask_gt();
-EXTERN void llvm_amdgcn_s_barrier();
-
// CU id
EXTERN unsigned __smid();
@@ -101,25 +99,21 @@ INLINE uint32_t __kmpc_impl_smid() {
return __smid();
}
-INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __ffsll(x); }
+INLINE uint64_t __kmpc_impl_ffs(uint64_t x) { return __builtin_ffsl(x); }
-INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __popcll(x); }
+INLINE uint64_t __kmpc_impl_popc(uint64_t x) { return __builtin_popcountl(x); }
INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
return __ballot64(1);
}
-INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
- int32_t SrcLane) {
- return __shfl(Var, SrcLane, WARPSIZE);
-}
+EXTERN int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t Var,
+ int32_t SrcLane);
-INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
- uint32_t Delta, int32_t Width) {
- return __shfl_down(Var, Delta, Width);
-}
+EXTERN int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t Var,
+ uint32_t Delta, int32_t Width);
-INLINE void __kmpc_impl_syncthreads() { llvm_amdgcn_s_barrier(); }
+INLINE void __kmpc_impl_syncthreads() { __builtin_amdgcn_s_barrier(); }
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
// we have protected the master warp from releasing from its barrier
@@ -128,4 +122,15 @@ INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
__builtin_amdgcn_s_barrier();
}
+// DEVICE versions of part of libc
+extern "C" {
+DEVICE __attribute__((noreturn)) void
+__assertfail(const char *, const char *, unsigned, const char *, size_t);
+INLINE static void __assert_fail(const char *__message, const char *__file,
+ unsigned int __line, const char *__function) {
+ __assertfail(__message, __file, __line, __function, sizeof(char));
+}
+DEVICE int printf(const char *, ...);
+}
+
#endif
diff --git a/openmp/libomptarget/deviceRTLs/common/debug.h b/openmp/libomptarget/deviceRTLs/common/debug.h
index 3388b04..8bb4e3a 100644
--- a/openmp/libomptarget/deviceRTLs/common/debug.h
+++ b/openmp/libomptarget/deviceRTLs/common/debug.h
@@ -28,7 +28,7 @@
#ifndef _OMPTARGET_NVPTX_DEBUG_H_
#define _OMPTARGET_NVPTX_DEBUG_H_
-#include "device_environment.h"
+#include "common/device_environment.h"
////////////////////////////////////////////////////////////////////////////////
// set desired level of debugging
@@ -128,7 +128,7 @@
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
#include <stdio.h>
-#include "support.h"
+#include "common/support.h"
template <typename... Arguments>
NOINLINE static void log(const char *fmt, Arguments... parameters) {
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h b/openmp/libomptarget/deviceRTLs/common/device_environment.h
index b2f65af..68a7757 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/device_environment.h
+++ b/openmp/libomptarget/deviceRTLs/common/device_environment.h
@@ -19,6 +19,6 @@ struct omptarget_device_environmentTy {
int32_t debug_level;
};
-extern __device__ omptarget_device_environmentTy omptarget_device_environment;
+extern DEVICE omptarget_device_environmentTy omptarget_device_environment;
#endif
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index 8cffd91..8cffd91 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 181bceb..46ed8f4 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "omptarget-nvptx.h"
-#include "device_environment.h"
+#include "common/device_environment.h"
////////////////////////////////////////////////////////////////////////////////
// global device environment
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index a446e00..51e88ad 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -24,7 +24,7 @@
#include "common/debug.h" // debug
#include "interface.h" // interfaces with omp, compiler, and user
#include "common/state-queue.h"
-#include "support.h"
+#include "common/support.h"
#define OMPTARGET_NVPTX_VERSION 1.1
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
index c8ac493..b5efa63 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "support.h"
+#include "common/support.h"
#include "common/debug.h"
#include "omptarget-nvptx.h"