aboutsummaryrefslogtreecommitdiff
path: root/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp')
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp180
1 files changed, 0 insertions, 180 deletions
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
deleted file mode 100644
index 34ad4e6..0000000
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
+++ /dev/null
@@ -1,180 +0,0 @@
-//===- LowerGPUToCUBIN.cpp - Convert GPU kernel to CUBIN blob -------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass that serializes a gpu module into CUBIN blob and
-// adds that blob as a string attribute of the module.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/GPU/Transforms/Passes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
-#include "llvm/Support/Debug.h"
-
-#if MLIR_GPU_TO_CUBIN_PASS_ENABLE
-#include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "llvm/Support/TargetSelect.h"
-#include "llvm/Support/Threading.h"
-
-#include <cuda.h>
-
-using namespace mlir;
-
-static void emitCudaError(const llvm::Twine &expr, const char *buffer,
- CUresult result, Location loc) {
- const char *error = nullptr;
- cuGetErrorString(result, &error);
- emitError(loc,
- expr.concat(error ? " failed with error code " + llvm::Twine{error}
- : llvm::Twine(" failed with unknown error "))
- .concat("[")
- .concat(buffer)
- .concat("]"));
-}
-
-#define RETURN_ON_CUDA_ERROR(expr) \
- do { \
- if (auto status = (expr)) { \
- emitCudaError(#expr, jitErrorBuffer, status, loc); \
- return {}; \
- } \
- } while (false)
-
-namespace {
-class SerializeToCubinPass
- : public PassWrapper<SerializeToCubinPass, gpu::SerializeToBlobPass> {
- static llvm::once_flag initializeBackendOnce;
-
-public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToCubinPass)
-
- SerializeToCubinPass(StringRef triple = "nvptx64-nvidia-cuda",
- StringRef chip = "sm_35", StringRef features = "+ptx60",
- int optLevel = 2, bool dumpPtx = false);
-
- StringRef getArgument() const override { return "gpu-to-cubin"; }
- StringRef getDescription() const override {
- return "Lower GPU kernel function to CUBIN binary annotations";
- }
-
-private:
- // Serializes PTX to CUBIN.
- std::unique_ptr<std::vector<char>>
- serializeISA(const std::string &isa) override;
-};
-} // namespace
-
-// Sets the 'option' to 'value' unless it already has a value.
-static void maybeSetOption(Pass::Option<std::string> &option, StringRef value) {
- if (!option.hasValue())
- option = value.str();
-}
-
-llvm::once_flag SerializeToCubinPass::initializeBackendOnce;
-
-SerializeToCubinPass::SerializeToCubinPass(StringRef triple, StringRef chip,
- StringRef features, int optLevel,
- bool dumpPtx) {
- // No matter how this pass is constructed, ensure that the NVPTX backend
- // is initialized exactly once.
- llvm::call_once(initializeBackendOnce, []() {
- // Initialize LLVM NVPTX backend.
-#if LLVM_HAS_NVPTX_TARGET
- LLVMInitializeNVPTXTarget();
- LLVMInitializeNVPTXTargetInfo();
- LLVMInitializeNVPTXTargetMC();
- LLVMInitializeNVPTXAsmPrinter();
-#endif
- });
-
- maybeSetOption(this->triple, triple);
- maybeSetOption(this->chip, chip);
- maybeSetOption(this->features, features);
- this->dumpPtx = dumpPtx;
- if (this->optLevel.getNumOccurrences() == 0)
- this->optLevel.setValue(optLevel);
-}
-
-std::unique_ptr<std::vector<char>>
-SerializeToCubinPass::serializeISA(const std::string &isa) {
- Location loc = getOperation().getLoc();
- char jitErrorBuffer[4096] = {0};
-
- RETURN_ON_CUDA_ERROR(cuInit(0));
-
- // Linking requires a device context.
- CUdevice device;
- RETURN_ON_CUDA_ERROR(cuDeviceGet(&device, 0));
- CUcontext context;
- // Use the primary context.
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRetain(&context, device));
- // Push the primary context so that the next CUDA operations
- // actually use it.
- RETURN_ON_CUDA_ERROR(cuCtxPushCurrent(context));
- CUlinkState linkState;
-
- CUjit_option jitOptions[] = {CU_JIT_ERROR_LOG_BUFFER,
- CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES};
- void *jitOptionsVals[] = {jitErrorBuffer,
- reinterpret_cast<void *>(sizeof(jitErrorBuffer))};
-
- RETURN_ON_CUDA_ERROR(cuLinkCreate(2, /* number of jit options */
- jitOptions, /* jit options */
- jitOptionsVals, /* jit option values */
- &linkState));
-
- auto kernelName = getOperation().getName().str();
- if (dumpPtx) {
- llvm::dbgs() << " Kernel Name : [" << kernelName << "]\n";
- llvm::dbgs() << isa << "\n";
- }
- RETURN_ON_CUDA_ERROR(cuLinkAddData(
- linkState, CUjitInputType::CU_JIT_INPUT_PTX,
- const_cast<void *>(static_cast<const void *>(isa.c_str())), isa.length(),
- kernelName.c_str(), 0, /* number of jit options */
- nullptr, /* jit options */
- nullptr /* jit option values */
- ));
-
- void *cubinData;
- size_t cubinSize;
- RETURN_ON_CUDA_ERROR(cuLinkComplete(linkState, &cubinData, &cubinSize));
-
- char *cubinAsChar = static_cast<char *>(cubinData);
- auto result =
- std::make_unique<std::vector<char>>(cubinAsChar, cubinAsChar + cubinSize);
-
- // This will also destroy the cubin data.
- RETURN_ON_CUDA_ERROR(cuLinkDestroy(linkState));
- // Pop and release the primary context.
- CUcontext poppedContext;
- RETURN_ON_CUDA_ERROR(cuCtxPopCurrent(&poppedContext));
- RETURN_ON_CUDA_ERROR(cuDevicePrimaryCtxRelease(device));
-
- return result;
-}
-
-// Register pass to serialize GPU kernel functions to a CUBIN binary annotation.
-void mlir::registerGpuSerializeToCubinPass() {
- PassRegistration<SerializeToCubinPass> registerSerializeToCubin(
- [] { return std::make_unique<SerializeToCubinPass>(); });
-}
-
-std::unique_ptr<Pass> mlir::createGpuSerializeToCubinPass(StringRef triple,
- StringRef arch,
- StringRef features,
- int optLevel,
- bool dumpPtx) {
- return std::make_unique<SerializeToCubinPass>(triple, arch, features,
- optLevel, dumpPtx);
-}
-
-#else // MLIR_GPU_TO_CUBIN_PASS_ENABLE
-void mlir::registerGpuSerializeToCubinPass() {}
-#endif // MLIR_GPU_TO_CUBIN_PASS_ENABLE