diff options
author | Fabian Mora <fmora.dev@gmail.com> | 2024-02-21 20:47:19 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-21 20:47:19 -0500 |
commit | f204aee1b9173ed9ae72017808f0a379c3a8de7a (patch) | |
tree | 788ed527b5779ed54f58086dc0c35326eb197ed6 /mlir | |
parent | 004c1972b4585fe8051814ceb6c6cdbf3cb62290 (diff) | |
download | llvm-f204aee1b9173ed9ae72017808f0a379c3a8de7a.zip llvm-f204aee1b9173ed9ae72017808f0a379c3a8de7a.tar.gz llvm-f204aee1b9173ed9ae72017808f0a379c3a8de7a.tar.bz2 |
[mlir][GPU] Remove the SerializeToCubin pass (#82486)
The `SerializeToCubin` pass was deprecated in September 2023 in favor of
GPU compilation attributes; see the [GPU
compilation](https://mlir.llvm.org/docs/Dialects/GPU/#gpu-compilation)
section in the `gpu` dialect MLIR docs.
This patch removes `SerializeToCubin` from the repo.
Diffstat (limited to 'mlir')
-rw-r--r-- | mlir/CMakeLists.txt | 1 | ||||
-rw-r--r-- | mlir/include/mlir/Dialect/GPU/Transforms/Passes.h | 14 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/CMakeLists.txt | 52 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp | 180 |
4 files changed, 0 insertions, 247 deletions
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt index 2d9f78e..16c898b 100644 --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -123,7 +123,6 @@ else() endif() add_definitions(-DMLIR_ROCM_CONVERSIONS_ENABLED=${MLIR_ENABLE_ROCM_CONVERSIONS}) -set(MLIR_ENABLE_DEPRECATED_GPU_SERIALIZATION 0 CACHE BOOL "Enable deprecated GPU serialization passes") set(MLIR_ENABLE_CUDA_RUNNER 0 CACHE BOOL "Enable building the mlir CUDA runner") set(MLIR_ENABLE_ROCM_RUNNER 0 CACHE BOOL "Enable building the mlir ROCm runner") set(MLIR_ENABLE_SYCL_RUNNER 0 CACHE BOOL "Enable building the mlir Sycl runner") diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index 5885fac..8f7466a 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -147,25 +147,11 @@ protected: // Registration //===----------------------------------------------------------------------===// -/// Register pass to serialize GPU kernel functions to a CUBIN binary -/// annotation. -LLVM_DEPRECATED("use Target attributes instead", "") -void registerGpuSerializeToCubinPass(); - /// Register pass to serialize GPU kernel functions to a HSAco binary /// annotation. LLVM_DEPRECATED("use Target attributes instead", "") void registerGpuSerializeToHsacoPass(); -/// Create an instance of the GPU kernel function to CUBIN binary serialization -/// pass with optLevel (default level 2). -LLVM_DEPRECATED("use Target attributes instead", "") -std::unique_ptr<Pass> createGpuSerializeToCubinPass(StringRef triple, - StringRef chip, - StringRef features, - int optLevel = 2, - bool dumpPtx = false); - /// Create an instance of the GPU kernel function to HSAco binary serialization /// pass. LLVM_DEPRECATED("use Target attributes instead", "") diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index e5776e1..51cfa22 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -1,11 +1,3 @@ -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - set(NVPTX_LIBS - NVPTXCodeGen - NVPTXDesc - NVPTXInfo - ) -endif() - if (MLIR_ENABLE_ROCM_CONVERSIONS) set(AMDGPU_LIBS IRReader @@ -60,7 +52,6 @@ add_mlir_dialect_library(MLIRGPUTransforms Transforms/ParallelLoopMapper.cpp Transforms/ROCDLAttachTarget.cpp Transforms/SerializeToBlob.cpp - Transforms/SerializeToCubin.cpp Transforms/SerializeToHsaco.cpp Transforms/ShuffleRewriter.cpp Transforms/SPIRVAttachTarget.cpp @@ -74,7 +65,6 @@ add_mlir_dialect_library(MLIRGPUTransforms Core MC Target - ${NVPTX_LIBS} ${AMDGPU_LIBS} DEPENDS @@ -110,48 +100,6 @@ add_mlir_dialect_library(MLIRGPUTransforms add_subdirectory(TransformOps) add_subdirectory(Pipelines) -if(MLIR_ENABLE_CUDA_RUNNER) - if(NOT MLIR_ENABLE_CUDA_CONVERSIONS) - message(SEND_ERROR - "Building mlir with cuda support requires the NVPTX backend") - endif() - - # Configure CUDA language support. Using check_language first allows us to - # give a custom error message. - include(CheckLanguage) - check_language(CUDA) - if (CMAKE_CUDA_COMPILER) - enable_language(CUDA) - else() - message(SEND_ERROR - "Building mlir with cuda support requires a working CUDA install") - endif() - - # Enable gpu-to-cubin pass. - target_compile_definitions(obj.MLIRGPUTransforms - PRIVATE - MLIR_GPU_TO_CUBIN_PASS_ENABLE=1 - ) - - # Add CUDA headers includes and the libcuda.so library. - target_include_directories(obj.MLIRGPUTransforms - PRIVATE - ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} - ) - - # Add link path for the cuda driver library. - find_library(CUDA_DRIVER_LIBRARY cuda HINTS ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES} REQUIRED) - get_filename_component(CUDA_DRIVER_LIBRARY_PATH "${CUDA_DRIVER_LIBRARY}" DIRECTORY) - target_link_directories(MLIRGPUTransforms PRIVATE ${CUDA_DRIVER_LIBRARY_PATH}) - - target_link_libraries(MLIRGPUTransforms - PRIVATE - MLIRNVVMToLLVMIRTranslation - cuda - ) - -endif() - if(MLIR_ENABLE_ROCM_CONVERSIONS) if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) message(SEND_ERROR diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp deleted file mode 100644 index 34ad4e6..0000000 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that serializes a gpu module into CUBIN blob and -// adds that blob as a string attribute of the module. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/Dialect/LLVMIR/NVVMDialect.h" -#include "llvm/Support/Debug.h" - -#if MLIR_GPU_TO_CUBIN_PASS_ENABLE -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Threading.h" - -#include <cuda.h> - -using namespace mlir; - -static void emitCudaError(const llvm::Twine &expr, const char *buffer, - CUresult result, Location loc) { - const char *error = nullptr; - cuGetErrorString(result, &error); - emitError(loc, - expr.concat(error ? " failed with error code " + llvm::Twine{error} - : llvm::Twine(" failed with unknown error ")) - .concat("[") - .concat(buffer) - .concat("]")); -} - -#define RETURN_ON_CUDA_ERROR(expr) \ - do { \ - if (auto status = (expr)) { \ - emitCudaError(#expr, jitErrorBuffer, status, loc); \ - return {}; \ - } \ - } while (false) - -namespace { -class SerializeToCubinPass - : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> { - static llvm::once_flag initializeBackendOnce; - -public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass) - - SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda", - StringRef chip = "sm_35", StringRef features = "+ptx60", - int optLevel = 2, bool dumpPtx = false); - - StringRef getArgument() const override { return "gpu-to-cubin"; } - StringRef getDescription() const override { - return "Lower GPU kernel function to CUBIN binary annotations"; - } - -private: - // Serializes PTX to CUBIN. - std::unique_ptr<std::vector<char>> - serializeISA(const std::string &isa) override; -}; -} // namespace - -// Sets the 'option' to 'value' unless it already has a value. -static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) { - if (!option.hasValue()) - option = value.str(); -} - -llvm::once_flag SerializeToCubinPass::initializeBackendOnce; - -SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip, - StringRef features, int optLevel, - bool dumpPtx) { - // No matter how this pass is constructed, ensure that the NVPTX backend - // is initialized exactly once. - llvm::call_once(initializeBackendOnce, []() { - // Initialize LLVM NVPTX backend. -#if LLVM_HAS_NVPTX_TARGET - LLVMInitializeNVPTXTarget(); - LLVMInitializeNVPTXTargetInfo(); - LLVMInitializeNVPTXTargetMC(); - LLVMInitializeNVPTXAsmPrinter(); -#endif - }); - - maybeSetOption(this->triple, triple); - maybeSetOption(this->chip, chip); - maybeSetOption(this->features, features); - this->dumpPtx = dumpPtx; - if (this->optLevel.getNumOccurrences() == 0) - this->optLevel.setValue(optLevel); -} - -std::unique_ptr<std::vector<char>> -SerializeToCubinPass::serializeISA(const std::string &isa) { - Location loc = getOperation().getLoc(); - char jitErrorBuffer[4096] = {0}; - - RETURN_ON_CUDA_ERROR(cuInit(0)); - - // Linking requires a device context. - CUdevice device; - RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0)); - CUcontext context; - // Use the primary context. - RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device)); - // Push the primary context so that the next CUDA operations - // actually use it. - RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context)); - CUlinkState linkState; - - CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER, - CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}; - void *jitOptionsVals[] = {jitErrorBuffer, - reinterpret_cast<void *>(sizeof(jitErrorBuffer))}; - - RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */ - jitOptions, /* jit options */ - jitOptionsVals, /* jit option values */ - &linkState)); - - auto kernelName = getOperation().getName().str(); - if (dumpPtx) { - llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n"; - llvm::dbgs() << isa << "\n"; - } - RETURN_ON_CUDA_ERROR(cuLinkAddData( - linkState, CUjitInputType::CU_JIT_INPUT_PTX, - const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(), - kernelName.c_str(), 0, /* number of jit options */ - nullptr, /* jit options */ - nullptr /* jit option values */ - )); - - void *cubinData; - size_t cubinSize; - RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize)); - - char *cubinAsChar = static_cast<char *>(cubinData); - auto result = - std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize); - - // This will also destroy the cubin data. - RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState)); - // Pop and release the primary context. - CUcontext poppedContext; - RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext)); - RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device)); - - return result; -} - -// Register pass to serialize GPU kernel functions to a CUBIN binary annotation. -void mlir::registerGpuSerializeToCubinPass() { - PassRegistration<SerializeToCubinPass> registerSerializeToCubin( - [] { return std::make_unique<SerializeToCubinPass>(); }); -} - -std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple, - StringRef arch, - StringRef features, - int optLevel, - bool dumpPtx) { - return std::make_unique<SerializeToCubinPass>(triple, arch, features, - optLevel, dumpPtx); -} - -#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE -void mlir::registerGpuSerializeToCubinPass() {} -#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE |