diff options
author | Fabian Mora <fmora.dev@gmail.com> | 2024-06-20 08:01:40 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-20 08:01:40 -0500 |
commit | 9ddf3b835c4c0f1d73ace1e2af1e05ec4bce865d (patch) | |
tree | f60a93bcfdd5dce44627c649442f2017fc5b921f /mlir | |
parent | 4abbf99579633d70bdecb9876cbed319ce9f546a (diff) | |
download | llvm-9ddf3b835c4c0f1d73ace1e2af1e05ec4bce865d.zip llvm-9ddf3b835c4c0f1d73ace1e2af1e05ec4bce865d.tar.gz llvm-9ddf3b835c4c0f1d73ace1e2af1e05ec4bce865d.tar.bz2 |
[mlir][gpu] Remove old GPU serialization passes (#94998)
This patch removes the last vestiges of the old gpu serialization
pipeline. To compile GPU code use target attributes instead.
See [Compilation overview | 'gpu' Dialect - MLIR
docs](https://mlir.llvm.org/docs/Dialects/GPU/#compilation-overview) for
additional information on the target attributes compilation pipeline
that replaced the old serialization pipeline.
Diffstat (limited to 'mlir')
-rw-r--r-- | mlir/docs/Dialects/GPU.md | 3 | ||||
-rw-r--r-- | mlir/include/mlir/Config/mlir-config.h.cmake | 3 | ||||
-rw-r--r-- | mlir/include/mlir/Conversion/Passes.td | 2 | ||||
-rw-r--r-- | mlir/include/mlir/Dialect/GPU/Transforms/Passes.h | 63 | ||||
-rw-r--r-- | mlir/include/mlir/Dialect/GPU/Transforms/Utils.h | 3 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/CMakeLists.txt | 43 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp | 153 | ||||
-rw-r--r-- | mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp | 458 | ||||
-rw-r--r-- | mlir/tools/mlir-opt/mlir-opt.cpp | 4 |
9 files changed, 1 insertions, 731 deletions
diff --git a/mlir/docs/Dialects/GPU.md b/mlir/docs/Dialects/GPU.md index 8a3acc3..8a0f117 100644 --- a/mlir/docs/Dialects/GPU.md +++ b/mlir/docs/Dialects/GPU.md @@ -37,9 +37,6 @@ complex lifetime analysis following the principles of MLIR that promote structure and representing analysis results in the IR. ## GPU Compilation -### Deprecation notice -The `--gpu-to-(cubin|hsaco)` passes will be deprecated in a future release. - ### Compilation overview The compilation process in the GPU dialect has two main stages: GPU module serialization and offloading operations translation. Together these stages can diff --git a/mlir/include/mlir/Config/mlir-config.h.cmake b/mlir/include/mlir/Config/mlir-config.h.cmake index abd6f41..dc55794 100644 --- a/mlir/include/mlir/Config/mlir-config.h.cmake +++ b/mlir/include/mlir/Config/mlir-config.h.cmake @@ -20,9 +20,6 @@ #ifndef MLIR_CONFIG_H #define MLIR_CONFIG_H -/* If set, enable deprecated serialization passes. */ -#cmakedefine01 MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE - /* Enable expensive checks to detect invalid pattern API usage. Failed checks manifest as fatal errors or invalid memory accesses (e.g., accessing deallocated memory) that cause a crash. Running with ASAN is recommended for diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index 9ab5faf..2315686 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -478,7 +478,7 @@ def GpuToLLVMConversionPass : Pass<"gpu-to-llvm", "ModuleOp"> { /*default=*/"false", "Use bare pointers to pass memref arguments to kernels. " "The kernel must use the same setting for this option." - > + > ]; let dependentDialects = [ diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h index 8f7466a..a20bae86 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Passes.h @@ -91,75 +91,12 @@ namespace gpu { LogicalResult transformGpuModulesToBinaries( Operation *op, OffloadingLLVMTranslationAttrInterface handler = nullptr, const gpu::TargetOptions &options = {}); - -/// Base pass class to serialize kernel functions through LLVM into -/// user-specified IR and add the resulting blob as module attribute. -class SerializeToBlobPass : public OperationPass<gpu::GPUModuleOp> { -public: - SerializeToBlobPass(TypeID passID); - SerializeToBlobPass(const SerializeToBlobPass &other); - - void runOnOperation() final; - -protected: - /// Hook allowing the application of optimizations before codegen - /// By default, does nothing - virtual LogicalResult optimizeLlvm(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine); - - /// Translates the 'getOperation()' result to an LLVM module. - virtual std::unique_ptr<llvm::Module> - translateToLLVMIR(llvm::LLVMContext &llvmContext); - -private: - /// Creates the LLVM target machine to generate the ISA. - std::unique_ptr<llvm::TargetMachine> createTargetMachine(); - - /// Translates the module to ISA - std::optional<std::string> translateToISA(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine); - - /// Serializes the target ISA to binary form. - virtual std::unique_ptr<std::vector<char>> - serializeISA(const std::string &isa) = 0; - -protected: - Option<std::string> triple{*this, "triple", - ::llvm::cl::desc("Target triple")}; - Option<std::string> chip{*this, "chip", - ::llvm::cl::desc("Target architecture")}; - Option<std::string> features{*this, "features", - ::llvm::cl::desc("Target features")}; - Option<int> optLevel{*this, "opt-level", - llvm::cl::desc("Optimization level for compilation"), - llvm::cl::init(2)}; - Option<std::string> gpuBinaryAnnotation{ - *this, "gpu-binary-annotation", - llvm::cl::desc("Annotation attribute string for GPU binary"), - llvm::cl::init(getDefaultGpuBinaryAnnotation())}; - Option<bool> dumpPtx{*this, "dump-ptx", - ::llvm::cl::desc("Dump generated PTX"), - llvm::cl::init(false)}; -}; } // namespace gpu //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// -/// Register pass to serialize GPU kernel functions to a HSAco binary -/// annotation. -LLVM_DEPRECATED("use Target attributes instead", "") -void registerGpuSerializeToHsacoPass(); - -/// Create an instance of the GPU kernel function to HSAco binary serialization -/// pass. -LLVM_DEPRECATED("use Target attributes instead", "") -std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple, - StringRef arch, - StringRef features, - int optLevel); - /// Collect a set of patterns to decompose memrefs ops. void populateGpuDecomposeMemrefsPatterns(RewritePatternSet &patterns); diff --git a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h index f25c506..f8c018e 100644 --- a/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/Transforms/Utils.h @@ -28,9 +28,6 @@ namespace gpu { class GPUFuncOp; class LaunchOp; -/// Returns the default annotation name for GPU binary blobs. -std::string getDefaultGpuBinaryAnnotation(); - /// Returns the matching vector combining kind. vector::CombiningKind convertReductionKind(gpu::AllReduceOperation mode); } // namespace gpu diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 8c64c20..8e4cef5 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -1,17 +1,3 @@ -if (MLIR_ENABLE_ROCM_CONVERSIONS) - set(AMDGPU_LIBS - IRReader - IPO - linker - MCParser - AMDGPUAsmParser - AMDGPUCodeGen - AMDGPUDesc - AMDGPUInfo - target - ) -endif() - add_mlir_dialect_library(MLIRGPUDialect IR/GPUDialect.cpp IR/InferIntRangeInterfaceImpls.cpp @@ -51,8 +37,6 @@ add_mlir_dialect_library(MLIRGPUTransforms Transforms/NVVMAttachTarget.cpp Transforms/ParallelLoopMapper.cpp Transforms/ROCDLAttachTarget.cpp - Transforms/SerializeToBlob.cpp - Transforms/SerializeToHsaco.cpp Transforms/ShuffleRewriter.cpp Transforms/SPIRVAttachTarget.cpp Transforms/SubgroupReduceLowering.cpp @@ -63,12 +47,6 @@ add_mlir_dialect_library(MLIRGPUTransforms ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU - LINK_COMPONENTS - Core - MC - Target - ${AMDGPU_LIBS} - DEPENDS MLIRGPUPassIncGen MLIRParallelLoopMapperEnumsGen @@ -78,15 +56,12 @@ add_mlir_dialect_library(MLIRGPUTransforms MLIRArithDialect MLIRAsyncDialect MLIRBufferizationDialect - MLIRBuiltinToLLVMIRTranslation MLIRDataLayoutInterfaces MLIRExecutionEngineUtils MLIRGPUDialect MLIRIR MLIRIndexDialect MLIRLLVMDialect - MLIRGPUToLLVMIRTranslation - MLIRLLVMToLLVMIRTranslation MLIRMemRefDialect MLIRNVVMTarget MLIRPass @@ -101,21 +76,3 @@ add_mlir_dialect_library(MLIRGPUTransforms add_subdirectory(TransformOps) add_subdirectory(Pipelines) - -if(MLIR_ENABLE_ROCM_CONVERSIONS) - if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)) - message(SEND_ERROR - "Building mlir with ROCm support requires the AMDGPU backend") - endif() - - set(DEFAULT_ROCM_PATH "/opt/rocm" CACHE PATH "Fallback path to search for ROCm installs") - target_compile_definitions(obj.MLIRGPUTransforms - PRIVATE - __DEFAULT_ROCM_PATH__="${DEFAULT_ROCM_PATH}" - ) - - target_link_libraries(MLIRGPUTransforms - PRIVATE - MLIRROCDLToLLVMIRTranslation - ) -endif() diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp deleted file mode 100644 index 1fdfe97..0000000 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp +++ /dev/null @@ -1,153 +0,0 @@ -//===- SerializeToBlob.cpp - MLIR GPU lowering pass -----------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a base class for a pass to serialize a gpu module -// into a binary blob that can be executed on a GPU. The binary blob is added -// as a string attribute to the gpu module. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Dialect/GPU/IR/GPUDialect.h" -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/ExecutionEngine/OptUtils.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/MC/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Target/TargetMachine.h" - -#include <optional> -#include <string> - -#define DEBUG_TYPE "serialize-to-blob" - -using namespace mlir; - -std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; } - -gpu::SerializeToBlobPass::SerializeToBlobPass(TypeID passID) - : OperationPass<gpu::GPUModuleOp>(passID) {} - -gpu::SerializeToBlobPass::SerializeToBlobPass(const SerializeToBlobPass &other) - : OperationPass<gpu::GPUModuleOp>(other) {} - -std::optional<std::string> -gpu::SerializeToBlobPass::translateToISA(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { - llvmModule.setDataLayout(targetMachine.createDataLayout()); - - if (failed(optimizeLlvm(llvmModule, targetMachine))) - return std::nullopt; - - std::string targetISA; - llvm::raw_string_ostream stream(targetISA); - - { // Drop pstream after this to prevent the ISA from being stuck buffering - llvm::buffer_ostream pstream(stream); - llvm::legacy::PassManager codegenPasses; - - if (targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, - llvm::CodeGenFileType::AssemblyFile)) - return std::nullopt; - - codegenPasses.run(llvmModule); - } - return stream.str(); -} - -void gpu::SerializeToBlobPass::runOnOperation() { - // Lower the module to an LLVM IR module using a separate context to enable - // multi-threaded processing. - llvm::LLVMContext llvmContext; - std::unique_ptr<llvm::Module> llvmModule = translateToLLVMIR(llvmContext); - if (!llvmModule) - return signalPassFailure(); - - // Lower the LLVM IR module to target ISA. - std::unique_ptr<llvm::TargetMachine> targetMachine = createTargetMachine(); - if (!targetMachine) - return signalPassFailure(); - - std::optional<std::string> maybeTargetISA = - translateToISA(*llvmModule, *targetMachine); - - if (!maybeTargetISA.has_value()) - return signalPassFailure(); - - std::string targetISA = std::move(*maybeTargetISA); - - LLVM_DEBUG({ - llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"; - llvm::dbgs() << targetISA << "\n"; - llvm::dbgs().flush(); - }); - - // Serialize the target ISA. - std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA); - if (!blob) - return signalPassFailure(); - - // Add the blob as module attribute. - auto attr = - StringAttr::get(&getContext(), StringRef(blob->data(), blob->size())); - getOperation()->setAttr(gpuBinaryAnnotation, attr); -} - -LogicalResult -gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, - llvm::TargetMachine &targetMachine) { - int optLevel = this->optLevel.getValue(); - if (optLevel < 0 || optLevel > 3) - return getOperation().emitError() - << "invalid optimization level " << optLevel; - - targetMachine.setOptLevel(static_cast<llvm::CodeGenOptLevel>(optLevel)); - - auto transformer = - makeOptimizingTransformer(optLevel, /*sizeLevel=*/0, &targetMachine); - auto error = transformer(&llvmModule); - if (error) { - InFlightDiagnostic mlirError = getOperation()->emitError(); - llvm::handleAllErrors( - std::move(error), [&mlirError](const llvm::ErrorInfoBase &ei) { - mlirError << "could not optimize LLVM IR: " << ei.message(); - }); - return mlirError; - } - return success(); -} - -std::unique_ptr<llvm::TargetMachine> -gpu::SerializeToBlobPass::createTargetMachine() { - Location loc = getOperation().getLoc(); - std::string error; - const llvm::Target *target = - llvm::TargetRegistry::lookupTarget(triple, error); - if (!target) { - emitError(loc, Twine("failed to lookup target: ") + error); - return {}; - } - llvm::TargetMachine *machine = - target->createTargetMachine(triple, chip, features, {}, {}); - if (!machine) { - emitError(loc, "failed to create target machine"); - return {}; - } - - return std::unique_ptr<llvm::TargetMachine>{machine}; -} - -std::unique_ptr<llvm::Module> -gpu::SerializeToBlobPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { - return translateModuleToLLVMIR(getOperation(), llvmContext, - "LLVMDialectModule"); -} diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp deleted file mode 100644 index a4f1998..0000000 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp +++ /dev/null @@ -1,458 +0,0 @@ -//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that serializes a gpu module into HSAco blob and -// adds that blob as a string attribute of the module. -// -//===----------------------------------------------------------------------===// - -#include "mlir/Config/mlir-config.h" -#include "mlir/Dialect/GPU/Transforms/Passes.h" -#include "mlir/IR/Location.h" -#include "mlir/IR/MLIRContext.h" - -#if MLIR_ENABLE_ROCM_CONVERSIONS -#include "mlir/ExecutionEngine/OptUtils.h" -#include "mlir/Pass/Pass.h" -#include "mlir/Support/FileUtilities.h" -#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h" -#include "mlir/Target/LLVMIR/Export.h" - -#include "llvm/IR/Constants.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" - -#include "llvm/MC/MCAsmBackend.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCCodeEmitter.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" -#include "llvm/MC/MCParser/MCTargetAsmParser.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/MC/TargetRegistry.h" - -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FileUtilities.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Program.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/TargetSelect.h" -#include "llvm/Support/Threading.h" -#include "llvm/Support/WithColor.h" - -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" - -#include "llvm/Transforms/IPO/Internalize.h" - -#include <optional> - -using namespace mlir; - -namespace { -class SerializeToHsacoPass - : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> { - static llvm::once_flag initializeBackendOnce; - -public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(SerializeToHsacoPass) - - SerializeToHsacoPass(StringRef triple, StringRef arch, StringRef features, - int optLevel); - SerializeToHsacoPass(const SerializeToHsacoPass &other); - StringRef getArgument() const override { return "gpu-to-hsaco"; } - StringRef getDescription() const override { - return "Lower GPU kernel function to HSACO binary annotations"; - } - -protected: - Option<std::string> rocmPath{*this, "rocm-path", - llvm::cl::desc("Path to ROCm install")}; - - // Overload to allow linking in device libs - std::unique_ptr<llvm::Module> - translateToLLVMIR(llvm::LLVMContext &llvmContext) override; - -private: - // Loads LLVM bitcode libraries - std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> - loadLibraries(SmallVectorImpl<char> &path, - SmallVectorImpl<StringRef> &libraries, - llvm::LLVMContext &context); - - // Serializes ROCDL to HSACO. - std::unique_ptr<std::vector<char>> - serializeISA(const std::string &isa) override; - - LogicalResult assembleIsa(const std::string &isa, - SmallVectorImpl<char> &result); - std::unique_ptr<std::vector<char>> createHsaco(ArrayRef<char> isaBinary); - - std::string getRocmPath(); -}; -} // namespace - -SerializeToHsacoPass::SerializeToHsacoPass(const SerializeToHsacoPass &other) - : PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass>(other) {} - -/// Get a user-specified path to ROCm -// Tries, in order, the --rocm-path option, the ROCM_PATH environment variable -// and a compile-time default -std::string SerializeToHsacoPass::getRocmPath() { - if (rocmPath.getNumOccurrences() > 0) - return rocmPath.getValue(); - - return __DEFAULT_ROCM_PATH__; -} - -// Sets the 'option' to 'value' unless it already has a value. -static void maybeSetOption(Pass::Option<std::string> &option, - function_ref<std::string()> getValue) { - if (!option.hasValue()) - option = getValue(); -} - -llvm::once_flag SerializeToHsacoPass::initializeBackendOnce; - -SerializeToHsacoPass::SerializeToHsacoPass(StringRef triple, StringRef arch, - StringRef features, int optLevel) { - // No matter how this pass is constructed, ensure that the AMDGPU backend - // is initialized exactly once. - llvm::call_once(initializeBackendOnce, []() { - // Initialize LLVM AMDGPU backend. - LLVMInitializeAMDGPUAsmParser(); - LLVMInitializeAMDGPUAsmPrinter(); - LLVMInitializeAMDGPUTarget(); - LLVMInitializeAMDGPUTargetInfo(); - LLVMInitializeAMDGPUTargetMC(); - }); - maybeSetOption(this->triple, [&triple] { return triple.str(); }); - maybeSetOption(this->chip, [&arch] { return arch.str(); }); - maybeSetOption(this->features, [&features] { return features.str(); }); - if (this->optLevel.getNumOccurrences() == 0) - this->optLevel.setValue(optLevel); -} - -std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> -SerializeToHsacoPass::loadLibraries(SmallVectorImpl<char> &path, - SmallVectorImpl<StringRef> &libraries, - llvm::LLVMContext &context) { - SmallVector<std::unique_ptr<llvm::Module>, 3> ret; - size_t dirLength = path.size(); - - if (!llvm::sys::fs::is_directory(path)) { - getOperation().emitRemark() << "Bitcode path: " << path - << " does not exist or is not a directory\n"; - return std::nullopt; - } - - for (const StringRef file : libraries) { - llvm::SMDiagnostic error; - llvm::sys::path::append(path, file); - llvm::StringRef pathRef(path.data(), path.size()); - std::unique_ptr<llvm::Module> library = - llvm::getLazyIRFileModule(pathRef, error, context); - path.truncate(dirLength); - if (!library) { - getOperation().emitError() << "Failed to load library " << file - << " from " << path << error.getMessage(); - return std::nullopt; - } - // Some ROCM builds don't strip this like they should - if (auto *openclVersion = library->getNamedMetadata("opencl.ocl.version")) - library->eraseNamedMetadata(openclVersion); - // Stop spamming us with clang version numbers - if (auto *ident = library->getNamedMetadata("llvm.ident")) - library->eraseNamedMetadata(ident); - ret.push_back(std::move(library)); - } - - return std::move(ret); -} - -std::unique_ptr<llvm::Module> -SerializeToHsacoPass::translateToLLVMIR(llvm::LLVMContext &llvmContext) { - // MLIR -> LLVM translation - std::unique_ptr<llvm::Module> ret = - gpu::SerializeToBlobPass::translateToLLVMIR(llvmContext); - - if (!ret) { - getOperation().emitOpError("Module lowering failed"); - return ret; - } - // Walk the LLVM module in order to determine if we need to link in device - // libs - bool needOpenCl = false; - bool needOckl = false; - bool needOcml = false; - for (llvm::Function &f : ret->functions()) { - if (f.hasExternalLinkage() && f.hasName() && !f.hasExactDefinition()) { - StringRef funcName = f.getName(); - if ("printf" == funcName) - needOpenCl = true; - if (funcName.starts_with("__ockl_")) - needOckl = true; - if (funcName.starts_with("__ocml_")) - needOcml = true; - } - } - - if (needOpenCl) - needOcml = needOckl = true; - - // No libraries needed (the typical case) - if (!(needOpenCl || needOcml || needOckl)) - return ret; - - // Define one of the control constants the ROCm device libraries expect to be - // present These constants can either be defined in the module or can be - // imported by linking in bitcode that defines the constant. To simplify our - // logic, we define the constants into the module we are compiling - auto addControlConstant = [&module = *ret](StringRef name, uint32_t value, - uint32_t bitwidth) { - using llvm::GlobalVariable; - if (module.getNamedGlobal(name)) { - return; - } - llvm::IntegerType *type = - llvm::IntegerType::getIntNTy(module.getContext(), bitwidth); - auto *initializer = llvm::ConstantInt::get(type, value, /*isSigned=*/false); - auto *constant = new GlobalVariable( - module, type, - /*isConstant=*/true, GlobalVariable::LinkageTypes::LinkOnceODRLinkage, - initializer, name, - /*before=*/nullptr, - /*threadLocalMode=*/GlobalVariable::ThreadLocalMode::NotThreadLocal, - /*addressSpace=*/4); - constant->setUnnamedAddr(GlobalVariable::UnnamedAddr::Local); - constant->setVisibility( - GlobalVariable::VisibilityTypes::ProtectedVisibility); - constant->setAlignment(llvm::MaybeAlign(bitwidth / 8)); - }; - - // Set up control variables in the module instead of linking in tiny bitcode - if (needOcml) { - // TODO(kdrewnia): Enable math optimizations once we have support for - // `-ffast-math`-like options - addControlConstant("__oclc_finite_only_opt", 0, 8); - addControlConstant("__oclc_daz_opt", 0, 8); - addControlConstant("__oclc_correctly_rounded_sqrt32", 1, 8); - addControlConstant("__oclc_unsafe_math_opt", 0, 8); - } - if (needOcml || needOckl) { - addControlConstant("__oclc_wavefrontsize64", 1, 8); - StringRef chipSet = this->chip.getValue(); - if (chipSet.starts_with("gfx")) - chipSet = chipSet.substr(3); - uint32_t minor = - llvm::APInt(32, chipSet.substr(chipSet.size() - 2), 16).getZExtValue(); - uint32_t major = llvm::APInt(32, chipSet.substr(0, chipSet.size() - 2), 10) - .getZExtValue(); - uint32_t isaNumber = minor + 1000 * major; - addControlConstant("__oclc_ISA_version", isaNumber, 32); - - // This constant must always match the default code object ABI version - // of the AMDGPU backend. - addControlConstant("__oclc_ABI_version", 500, 32); - } - - // Determine libraries we need to link - order matters due to dependencies - llvm::SmallVector<StringRef, 4> libraries; - if (needOpenCl) - libraries.push_back("opencl.bc"); - if (needOcml) - libraries.push_back("ocml.bc"); - if (needOckl) - libraries.push_back("ockl.bc"); - - std::optional<SmallVector<std::unique_ptr<llvm::Module>, 3>> mbModules; - std::string theRocmPath = getRocmPath(); - llvm::SmallString<32> bitcodePath(theRocmPath); - llvm::sys::path::append(bitcodePath, "amdgcn", "bitcode"); - mbModules = loadLibraries(bitcodePath, libraries, llvmContext); - - if (!mbModules) { - getOperation() - .emitWarning("Could not load required device libraries") - .attachNote() - << "This will probably cause link-time or run-time failures"; - return ret; // We can still abort here - } - - llvm::Linker linker(*ret); - for (std::unique_ptr<llvm::Module> &libModule : *mbModules) { - // This bitcode linking code is substantially similar to what is used in - // hip-clang It imports the library functions into the module, allowing LLVM - // optimization passes (which must run after linking) to optimize across the - // libraries and the module's code. We also only import symbols if they are - // referenced by the module or a previous library since there will be no - // other source of references to those symbols in this compilation and since - // we don't want to bloat the resulting code object. - bool err = linker.linkInModule( - std::move(libModule), llvm::Linker::Flags::LinkOnlyNeeded, - [](llvm::Module &m, const StringSet<> &gvs) { - llvm::internalizeModule(m, [&gvs](const llvm::GlobalValue &gv) { - return !gv.hasName() || (gvs.count(gv.getName()) == 0); - }); - }); - // True is linker failure - if (err) { - getOperation().emitError( - "Unrecoverable failure during device library linking."); - // We have no guaranties about the state of `ret`, so bail - return nullptr; - } - } - - return ret; -} - -LogicalResult SerializeToHsacoPass::assembleIsa(const std::string &isa, - SmallVectorImpl<char> &result) { - auto loc = getOperation().getLoc(); - - llvm::raw_svector_ostream os(result); - - llvm::Triple triple(llvm::Triple::normalize(this->triple)); - std::string error; - const llvm::Target *target = - llvm::TargetRegistry::lookupTarget(triple.normalize(), error); - if (!target) - return emitError(loc, Twine("failed to lookup target: ") + error); - - llvm::SourceMgr srcMgr; - srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc()); - - const llvm::MCTargetOptions mcOptions; - std::unique_ptr<llvm::MCRegisterInfo> mri( - target->createMCRegInfo(this->triple)); - std::unique_ptr<llvm::MCAsmInfo> mai( - target->createMCAsmInfo(*mri, this->triple, mcOptions)); - std::unique_ptr<llvm::MCSubtargetInfo> sti( - target->createMCSubtargetInfo(this->triple, this->chip, this->features)); - - llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr, - &mcOptions); - std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo( - ctx, /*PIC=*/false, /*LargeCodeModel=*/false)); - ctx.setObjectFileInfo(mofi.get()); - - SmallString<128> cwd; - if (!llvm::sys::fs::current_path(cwd)) - ctx.setCompilationDir(cwd); - - std::unique_ptr<llvm::MCStreamer> mcStreamer; - std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo()); - - llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx); - llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions); - mcStreamer.reset(target->createMCObjectStreamer( - triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab), - mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce), - *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible, - /*DWARFMustBeAtTheEnd*/ false)); - - std::unique_ptr<llvm::MCAsmParser> parser( - createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai)); - std::unique_ptr<llvm::MCTargetAsmParser> tap( - target->createMCAsmParser(*sti, *parser, *mcii, mcOptions)); - - if (!tap) - return emitError(loc, "assembler initialization error"); - - parser->setTargetParser(*tap); - parser->Run(false); - - return success(); -} - -std::unique_ptr<std::vector<char>> -SerializeToHsacoPass::createHsaco(ArrayRef<char> isaBinary) { - auto loc = getOperation().getLoc(); - - // Save the ISA binary to a temp file. - int tempIsaBinaryFd = -1; - SmallString<128> tempIsaBinaryFilename; - if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd, - tempIsaBinaryFilename)) { - emitError(loc, "temporary file for ISA binary creation error"); - return {}; - } - llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename); - llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true); - tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size()); - tempIsaBinaryOs.close(); - - // Create a temp file for HSA code object. - SmallString<128> tempHsacoFilename; - if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", - tempHsacoFilename)) { - emitError(loc, "temporary file for HSA code object creation error"); - return {}; - } - llvm::FileRemover cleanupHsaco(tempHsacoFilename); - - std::string theRocmPath = getRocmPath(); - llvm::SmallString<32> lldPath(theRocmPath); - llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld"); - int lldResult = llvm::sys::ExecuteAndWait( - lldPath, - {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename}); - if (lldResult != 0) { - emitError(loc, "lld invocation error"); - return {}; - } - - // Load the HSA code object. - auto hsacoFile = - llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false); - if (!hsacoFile) { - emitError(loc, "read HSA code object from temp file error"); - return {}; - } - - StringRef buffer = (*hsacoFile)->getBuffer(); - return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end()); -} - -std::unique_ptr<std::vector<char>> -SerializeToHsacoPass::serializeISA(const std::string &isa) { - SmallVector<char, 0> isaBinary; - if (failed(assembleIsa(isa, isaBinary))) - return {}; - return createHsaco(isaBinary); -} - -// Register pass to serialize GPU kernel functions to a HSACO binary annotation. -void mlir::registerGpuSerializeToHsacoPass() { - PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO([] { - return std::make_unique<SerializeToHsacoPass>("amdgcn-amd-amdhsa", "", "", - 2); - }); -} - -/// Create an instance of the GPU kernel function to HSAco binary serialization -/// pass. -std::unique_ptr<Pass> mlir::createGpuSerializeToHsacoPass(StringRef triple, - StringRef arch, - StringRef features, - int optLevel) { - return std::make_unique<SerializeToHsacoPass>(triple, arch, features, - optLevel); -} - -#else // MLIR_ENABLE_ROCM_CONVERSIONS -void mlir::registerGpuSerializeToHsacoPass() {} -#endif // MLIR_ENABLE_ROCM_CONVERSIONS diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index d0de74d..e4fbd03 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -288,10 +288,6 @@ void registerTestPasses() { int main(int argc, char **argv) { registerAllPasses(); -#if MLIR_DEPRECATED_GPU_SERIALIZATION_ENABLE - registerGpuSerializeToCubinPass(); - registerGpuSerializeToHsacoPass(); -#endif #ifdef MLIR_INCLUDE_TESTS registerTestPasses(); #endif |