diff options
author | Joseph Huber <huberjn@outlook.com> | 2025-03-20 14:17:41 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-20 14:17:41 -0500 |
commit | 221b0117fd21d45098ead779a040a4b939a5c84f (patch) | |
tree | 42e6d148f29966a3415b047ffe72aa054bac53fd | |
parent | 7c11d053f6619e9feb73b75353cf6d40f989b8d2 (diff) | |
download | llvm-221b0117fd21d45098ead779a040a4b939a5c84f.zip llvm-221b0117fd21d45098ead779a040a4b939a5c84f.tar.gz llvm-221b0117fd21d45098ead779a040a4b939a5c84f.tar.bz2 |
[LLVM] Make the GPU loader utilities an LLVM tool (#132096)
Summary:
These tools `amdhsa-loader` and `nvptx-loader` are used to execute unit
tests directly on the GPU. We use this for `libc` and `libcxx` unit
tests as well as general GPU experimentation. It looks like this.
```console
> clang++ main.cpp --target=amdgcn-amd-amdhsa -mcpu=native -flto -lc ./lib/amdgcn-amd-amdhsa/crt1.o
> llvm-gpu-loader a.out
Hello World!
```
Currently these are a part of the `libc` project, but this creates
issues as `libc` itself depends on them to run tests. Right now we get
around this by force-including the `libc` project prior to running the
runtimes build so that this dependency can be built first. We should
instead just make this a simple LLVM tool so it's always available.
This has the effect of installing these by default now instead of just
when `libc` was enabled, but they should be relatively small. Right now
this only supports a 'static' configuration. That is, we locate the CUDA
and HSA dependencies at LLVM compile time. In the future we should be
able to provide this by default using `dlopen` and some API.
I don't know if it's required to reformat all of these names since they
used the `libc` naming convention so I just left it for now.
-rw-r--r-- | libc/CMakeLists.txt | 7 | ||||
-rw-r--r-- | libc/utils/gpu/CMakeLists.txt | 1 | ||||
-rw-r--r-- | libc/utils/gpu/loader/CMakeLists.txt | 54 | ||||
-rw-r--r-- | libc/utils/gpu/loader/amdgpu/CMakeLists.txt | 10 | ||||
-rw-r--r-- | libc/utils/gpu/loader/nvptx/CMakeLists.txt | 9 | ||||
-rw-r--r-- | llvm/CMakeLists.txt | 4 | ||||
-rw-r--r-- | llvm/runtimes/CMakeLists.txt | 14 | ||||
-rw-r--r-- | llvm/tools/llvm-gpu-loader/CMakeLists.txt | 45 | ||||
-rw-r--r-- | llvm/tools/llvm-gpu-loader/amdhsa.cpp (renamed from libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp) | 8 | ||||
-rw-r--r-- | llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp (renamed from libc/utils/gpu/loader/Main.cpp) | 46 | ||||
-rw-r--r-- | llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h (renamed from libc/utils/gpu/loader/Loader.h) | 13 | ||||
-rw-r--r-- | llvm/tools/llvm-gpu-loader/nvptx.cpp (renamed from libc/utils/gpu/loader/nvptx/nvptx-loader.cpp) | 8 |
12 files changed, 102 insertions, 117 deletions
diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index ad39ff6..b264dcb4 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -59,13 +59,6 @@ set(LIBC_NAMESPACE ${default_namespace} CACHE STRING "The namespace to use to enclose internal implementations. Must start with '__llvm_libc'." ) -# We will build the GPU utilities if we are not doing a runtimes build. -option(LIBC_BUILD_GPU_LOADER "Always build the GPU loader utilities" OFF) -if(LIBC_BUILD_GPU_LOADER OR ((NOT LLVM_RUNTIMES_BUILD) AND LLVM_LIBC_GPU_BUILD)) - add_subdirectory(utils/gpu) - return() -endif() - option(LIBC_CMAKE_VERBOSE_LOGGING "Log details warnings and notifications during CMake configuration." OFF) diff --git a/libc/utils/gpu/CMakeLists.txt b/libc/utils/gpu/CMakeLists.txt deleted file mode 100644 index e529646a..0000000 --- a/libc/utils/gpu/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(loader) diff --git a/libc/utils/gpu/loader/CMakeLists.txt b/libc/utils/gpu/loader/CMakeLists.txt deleted file mode 100644 index 9b3bd00..0000000 --- a/libc/utils/gpu/loader/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -add_library(gpu_loader OBJECT Main.cpp) - -include(FindLibcCommonUtils) -target_link_libraries(gpu_loader PUBLIC llvm-libc-common-utilities) - -target_include_directories(gpu_loader PUBLIC - ${CMAKE_CURRENT_SOURCE_DIR} - ${LIBC_SOURCE_DIR}/include - ${LIBC_SOURCE_DIR} - ${LLVM_MAIN_INCLUDE_DIR} - ${LLVM_BINARY_DIR}/include -) -if(NOT LLVM_ENABLE_RTTI) - target_compile_options(gpu_loader PUBLIC -fno-rtti) -endif() - -find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) -if(hsa-runtime64_FOUND) - add_subdirectory(amdgpu) -endif() - -# The CUDA loader requires LLVM to traverse the ELF image for symbols. -find_package(CUDAToolkit 11.2 QUIET) -if(CUDAToolkit_FOUND) - add_subdirectory(nvptx) -endif() - -if(TARGET amdhsa-loader AND LIBC_TARGET_ARCHITECTURE_IS_AMDGPU) - add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader amdhsa-loader) - set_target_properties( - libc.utils.gpu.loader - PROPERTIES - TARGET amdhsa-loader - EXECUTABLE "$<TARGET_FILE:amdhsa-loader>" - ) -elseif(TARGET nvptx-loader AND LIBC_TARGET_ARCHITECTURE_IS_NVPTX) - add_custom_target(libc.utils.gpu.loader) - add_dependencies(libc.utils.gpu.loader nvptx-loader) - set_target_properties( - libc.utils.gpu.loader - PROPERTIES - TARGET nvptx-loader - EXECUTABLE "$<TARGET_FILE:nvptx-loader>" - ) -endif() - -foreach(gpu_loader_tgt amdhsa-loader nvptx-loader) - if(TARGET ${gpu_loader_tgt}) - install(TARGETS ${gpu_loader_tgt} - DESTINATION ${CMAKE_INSTALL_BINDIR} - COMPONENT libc) - endif() -endforeach() diff --git a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt b/libc/utils/gpu/loader/amdgpu/CMakeLists.txt deleted file mode 100644 index 17878da..0000000 --- a/libc/utils/gpu/loader/amdgpu/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -set(LLVM_LINK_COMPONENTS - BinaryFormat - Object - Option - Support - FrontendOffloading - ) - -add_llvm_executable(amdhsa-loader amdhsa-loader.cpp) -target_link_libraries(amdhsa-loader PRIVATE hsa-runtime64::hsa-runtime64 gpu_loader) diff --git a/libc/utils/gpu/loader/nvptx/CMakeLists.txt b/libc/utils/gpu/loader/nvptx/CMakeLists.txt deleted file mode 100644 index 42510ac..0000000 --- a/libc/utils/gpu/loader/nvptx/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -set(LLVM_LINK_COMPONENTS - BinaryFormat - Object - Option - Support - ) - -add_llvm_executable(nvptx-loader nvptx-loader.cpp) -target_link_libraries(nvptx-loader PRIVATE gpu_loader CUDA::cuda_driver) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 5639061..e76bc9b 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -210,10 +210,6 @@ if("${LIBC_TARGET_TRIPLE}" STREQUAL "amdgcn-amd-amdhsa" OR "${LIBC_TARGET_TRIPLE}" STREQUAL "nvptx64-nvidia-cuda") set(LLVM_LIBC_GPU_BUILD ON) endif() -if (NOT "libc" IN_LIST LLVM_ENABLE_PROJECTS AND LLVM_LIBC_GPU_BUILD) - message(STATUS "Enabling libc project to build libc testing tools") - list(APPEND LLVM_ENABLE_PROJECTS "libc") -endif() # LLVM_ENABLE_PROJECTS_USED is `ON` if the user has ever used the # `LLVM_ENABLE_PROJECTS` CMake cache variable. This exists for diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 136099d..51433d1 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -534,20 +534,6 @@ if(build_runtimes) endif() if(LLVM_LIBC_GPU_BUILD) list(APPEND extra_cmake_args "-DLLVM_LIBC_GPU_BUILD=ON") - if("libc" IN_LIST RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES) - if(TARGET amdhsa-loader) - list(APPEND extra_cmake_args - "-DRUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:amdhsa-loader>") - list(APPEND extra_deps amdhsa-loader) - endif() - endif() - if("libc" IN_LIST RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES) - if(TARGET nvptx-loader) - list(APPEND extra_cmake_args - "-DRUNTIMES_nvptx64-nvidia-cuda_LIBC_GPU_LOADER_EXECUTABLE=$<TARGET_FILE:nvptx-loader>") - list(APPEND extra_deps nvptx-loader) - endif() - endif() if(TARGET clang-offload-packager) list(APPEND extra_deps clang-offload-packager) endif() diff --git a/llvm/tools/llvm-gpu-loader/CMakeLists.txt b/llvm/tools/llvm-gpu-loader/CMakeLists.txt new file mode 100644 index 0000000..4b4a6e7 --- /dev/null +++ b/llvm/tools/llvm-gpu-loader/CMakeLists.txt @@ -0,0 +1,45 @@ +set(LLVM_LINK_COMPONENTS + BinaryFormat + Object + Option + Support + FrontendOffloading +) + +add_llvm_tool(llvm-gpu-loader + llvm-gpu-loader.cpp + + # TODO: We intentionally split this currently due to statically linking the + # GPU runtimes. Dynamically load the dependencies, possibly using the + # LLVM offloading API when it is complete. + PARTIAL_SOURCES_INTENDED + + DEPENDS + intrinsics_gen +) + +# Locate the RPC server handling interface. +include(FindLibcCommonUtils) +target_link_libraries(llvm-gpu-loader PUBLIC llvm-libc-common-utilities) + +# Check for HSA support for targeting AMD GPUs. +find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) +if(hsa-runtime64_FOUND) + target_sources(llvm-gpu-loader PRIVATE amdhsa.cpp) + target_compile_definitions(llvm-gpu-loader PRIVATE AMDHSA_SUPPORT) + target_link_libraries(llvm-gpu-loader PRIVATE hsa-runtime64::hsa-runtime64) + + # Compatibility with the old amdhsa-loader name. + add_llvm_tool_symlink(amdhsa-loader llvm-gpu-loader) +endif() + +# Check for CUDA support for targeting NVIDIA GPUs. +find_package(CUDAToolkit 11.2 QUIET) +if(CUDAToolkit_FOUND) + target_sources(llvm-gpu-loader PRIVATE nvptx.cpp) + target_compile_definitions(llvm-gpu-loader PRIVATE NVPTX_SUPPORT) + target_link_libraries(llvm-gpu-loader PRIVATE CUDA::cuda_driver) + + # Compatibility with the old nvptx-loader name. + add_llvm_tool_symlink(nvptx-loader llvm-gpu-loader) +endif() diff --git a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp b/llvm/tools/llvm-gpu-loader/amdhsa.cpp index 00fde14..f3c8f64 100644 --- a/libc/utils/gpu/loader/amdgpu/amdhsa-loader.cpp +++ b/llvm/tools/llvm-gpu-loader/amdhsa.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "hsa/hsa.h" #include "hsa/hsa_ext_amd.h" @@ -330,9 +330,9 @@ static hsa_status_t hsa_memcpy(void *dst, hsa_agent_t dst_agent, return HSA_STATUS_SUCCESS; } -int load(int argc, const char **argv, const char **envp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage) { +int load_amdhsa(int argc, const char **argv, const char **envp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage) { // Initialize the HSA runtime used to communicate with the device. if (hsa_status_t err = hsa_init()) handle_error(err); diff --git a/libc/utils/gpu/loader/Main.cpp b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp index c3aeeff..9b157e3 100644 --- a/libc/utils/gpu/loader/Main.cpp +++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.cpp @@ -6,14 +6,17 @@ // //===----------------------------------------------------------------------===// // -// This file opens a device image passed on the command line and passes it to -// one of the loader implementations for launch. +// This utility is used to launch standard programs onto the GPU in conjunction +// with the LLVM 'libc' project. It is designed to mimic a standard emulator +// workflow, allowing for unit tests to be run on the GPU directly. // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "llvm/BinaryFormat/Magic.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -21,6 +24,7 @@ #include "llvm/Support/Path.h" #include "llvm/Support/Signals.h" #include "llvm/Support/WithColor.h" +#include "llvm/TargetParser/Triple.h" #include <cerrno> #include <cstdio> @@ -125,12 +129,40 @@ int main(int argc, const char **argv, const char **envp) { strerror(errno))); } - // Drop the loader from the program arguments. LaunchParameters params{threads_x, threads_y, threads_z, blocks_x, blocks_y, blocks_z}; - int ret = load(new_argv.size(), new_argv.data(), envp, - const_cast<char *>(image.getBufferStart()), - image.getBufferSize(), params, print_resource_usage); + + Expected<llvm::object::ELF64LEObjectFile> elf_or_err = + llvm::object::ELF64LEObjectFile::create(image); + if (!elf_or_err) + report_error(std::move(elf_or_err.takeError())); + + int ret = 1; + if (elf_or_err->getArch() == Triple::amdgcn) { +#ifdef AMDHSA_SUPPORT + ret = load_amdhsa(new_argv.size(), new_argv.data(), envp, + const_cast<char *>(image.getBufferStart()), + image.getBufferSize(), params, print_resource_usage); +#else + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); +#endif + } else if (elf_or_err->getArch() == Triple::nvptx64) { +#ifdef NVPTX_SUPPORT + ret = load_nvptx(new_argv.size(), new_argv.data(), envp, + const_cast<char *>(image.getBufferStart()), + image.getBufferSize(), params, print_resource_usage); +#else + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); +#endif + } else { + report_error(createStringError( + "Unsupported architecture; %s", + Triple::getArchTypeName(elf_or_err->getArch()).bytes_begin())); + } if (no_parallelism) { if (flock(fd, LOCK_UN) == -1) diff --git a/libc/utils/gpu/loader/Loader.h b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h index ec05117..29da395 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/llvm/tools/llvm-gpu-loader/llvm-gpu-loader.h @@ -54,9 +54,16 @@ struct end_args_t { /// Generic interface to load the \p image and launch execution of the _start /// kernel on the target device. Copies \p argc and \p argv to the device. /// Returns the final value of the `main` function on the device. -int load(int argc, const char **argv, const char **evnp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage); +#ifdef AMDHSA_SUPPORT +int load_amdhsa(int argc, const char **argv, const char **evnp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage); +#endif +#ifdef NVPTX_SUPPORT +int load_nvptx(int argc, const char **argv, const char **evnp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage); +#endif /// Return \p V aligned "upwards" according to \p Align. template <typename V, typename A> inline V align_up(V val, A align) { diff --git a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp b/llvm/tools/llvm-gpu-loader/nvptx.cpp index 7d6c176..f749560 100644 --- a/libc/utils/gpu/loader/nvptx/nvptx-loader.cpp +++ b/llvm/tools/llvm-gpu-loader/nvptx.cpp @@ -13,7 +13,7 @@ // //===----------------------------------------------------------------------===// -#include "Loader.h" +#include "llvm-gpu-loader.h" #include "cuda.h" @@ -236,9 +236,9 @@ CUresult launch_kernel(CUmodule binary, CUstream stream, rpc::Server &server, return CUDA_SUCCESS; } -int load(int argc, const char **argv, const char **envp, void *image, - size_t size, const LaunchParameters ¶ms, - bool print_resource_usage) { +int load_nvptx(int argc, const char **argv, const char **envp, void *image, + size_t size, const LaunchParameters ¶ms, + bool print_resource_usage) { if (CUresult err = cuInit(0)) handle_error(err); // Obtain the first device found on the system. |