aboutsummaryrefslogtreecommitdiff
path: root/mlir
diff options
context:
space:
mode:
authorChristian Sigg <csigg@google.com>2021-03-19 00:22:50 -0700
committerChristian Sigg <csigg@google.com>2021-03-19 00:24:10 -0700
commita825fb2c07337cc2c84783558e91416e07adcf42 (patch)
tree2ab4d4673c0df3b5ece19a061361d749646a65b9 /mlir
parent4ee4f9bf4ae49df25b46351a0bfca3a36e7bf82d (diff)
downloadllvm-a825fb2c07337cc2c84783558e91416e07adcf42.zip
llvm-a825fb2c07337cc2c84783558e91416e07adcf42.tar.gz
llvm-a825fb2c07337cc2c84783558e91416e07adcf42.tar.bz2
[mlir] Remove mlir-rocm-runner
This change combines for ROCm what was done for CUDA in D97463, D98203, D98360, and D98396. I did not try to compile SerializeToHsaco.cpp or test mlir/test/Integration/GPU/ROCM because I don't have an AMD card. I fixed the things that had obvious bit-rot though. Reviewed By: whchung Differential Revision: https://reviews.llvm.org/D98447
Diffstat (limited to 'mlir')
-rw-r--r--mlir/include/mlir/Dialect/GPU/Passes.h4
-rw-r--r--mlir/include/mlir/InitAllPasses.h1
-rw-r--r--mlir/lib/Dialect/GPU/CMakeLists.txt67
-rw-r--r--mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp284
-rw-r--r--mlir/lib/ExecutionEngine/CMakeLists.txt49
-rw-r--r--mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp (renamed from mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp)30
-rw-r--r--mlir/test/CMakeLists.txt13
-rw-r--r--mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir7
-rw-r--r--mlir/test/Integration/GPU/CUDA/lit.local.cfg2
-rw-r--r--mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir (renamed from mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir)8
-rw-r--r--mlir/test/Integration/GPU/ROCM/lit.local.cfg (renamed from mlir/test/mlir-rocm-runner/lit.local.cfg)0
-rw-r--r--mlir/test/Integration/GPU/ROCM/two-modules.mlir (renamed from mlir/test/mlir-rocm-runner/two-modules.mlir)12
-rw-r--r--mlir/test/Integration/GPU/ROCM/vecadd.mlir (renamed from mlir/test/mlir-rocm-runner/vecadd.mlir)9
-rw-r--r--mlir/test/Integration/GPU/ROCM/vector-transferops.mlir (renamed from mlir/test/mlir-rocm-runner/vector-transferops.mlir)9
-rw-r--r--mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp60
-rw-r--r--mlir/test/lit.cfg.py1
-rw-r--r--mlir/test/lit.site.cfg.py.in1
-rw-r--r--mlir/tools/CMakeLists.txt1
-rw-r--r--mlir/tools/mlir-opt/mlir-opt.cpp4
-rw-r--r--mlir/tools/mlir-rocm-runner/CMakeLists.txt127
-rw-r--r--mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp349
21 files changed, 494 insertions, 544 deletions
diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index 6a6a2c0..bfb5626 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -90,6 +90,10 @@ protected:
/// annotation.
void registerGpuSerializeToCubinPass();
+/// Register pass to serialize GPU kernel functions to a HSAco binary
+/// annotation.
+void registerGpuSerializeToHsacoPass();
+
/// Generate the code for registering passes.
#define GEN_PASS_REGISTRATION
#include "mlir/Dialect/GPU/Passes.h.inc"
diff --git a/mlir/include/mlir/InitAllPasses.h b/mlir/include/mlir/InitAllPasses.h
index 029df07..ab9629a 100644
--- a/mlir/include/mlir/InitAllPasses.h
+++ b/mlir/include/mlir/InitAllPasses.h
@@ -52,6 +52,7 @@ inline void registerAllPasses() {
registerAsyncPasses();
registerGPUPasses();
registerGpuSerializeToCubinPass();
+ registerGpuSerializeToHsacoPass();
registerLinalgPasses();
LLVM::registerLLVMPasses();
quant::registerQuantPasses();
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index d7fbfe0..ea70029 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -6,6 +6,16 @@ if (MLIR_CUDA_CONVERSIONS_ENABLED)
)
endif()
+if (MLIR_ROCM_CONVERSIONS_ENABLED)
+ set(AMDGPU_LIBS
+ MCParser
+ AMDGPUAsmParser
+ AMDGPUCodeGen
+ AMDGPUDesc
+ AMDGPUInfo
+ )
+endif()
+
add_mlir_dialect_library(MLIRGPU
IR/GPUDialect.cpp
Transforms/AllReduceLowering.cpp
@@ -15,6 +25,7 @@ add_mlir_dialect_library(MLIRGPU
Transforms/ParallelLoopMapper.cpp
Transforms/SerializeToBlob.cpp
Transforms/SerializeToCubin.cpp
+ Transforms/SerializeToHsaco.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/GPU
@@ -23,6 +34,7 @@ add_mlir_dialect_library(MLIRGPU
Core
MC
${NVPTX_LIBS}
+ ${AMDGPU_LIBS}
DEPENDS
MLIRGPUOpsIncGen
@@ -84,3 +96,58 @@ if(MLIR_CUDA_RUNNER_ENABLED)
)
endif()
+
+if(MLIR_ROCM_RUNNER_ENABLED)
+ if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
+ message(SEND_ERROR
+ "Building mlir with ROCm support requires the AMDGPU backend")
+ endif()
+
+ # Ensure lld is enabled.
+ if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS)
+ message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS")
+ endif()
+
+ # Configure ROCm support.
+ if (NOT DEFINED ROCM_PATH)
+ if (NOT DEFINED ENV{ROCM_PATH})
+ set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
+ else()
+ set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
+ endif()
+ set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH " Path to which HIP has been installed")
+ endif()
+ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+ find_package(HIP)
+ if (NOT HIP_FOUND)
+ message(SEND_ERROR "Building mlir with ROCm support requires a working ROCm and HIP install")
+ else()
+ message(STATUS "ROCm HIP version: ${HIP_VERSION}")
+ endif()
+
+ target_compile_definitions(obj.MLIRGPU
+ PRIVATE
+ __HIP_PLATFORM_HCC__
+ __ROCM_PATH__="${ROCM_PATH}"
+ MLIR_GPU_TO_HSACO_PASS_ENABLE=1
+ )
+
+ target_include_directories(obj.MLIRGPU
+ PRIVATE
+ ${MLIR_SOURCE_DIR}/../lld/include
+ ${HIP_PATH}/include
+ ${ROCM_PATH}/include
+ )
+
+ target_link_libraries(MLIRGPU
+ PRIVATE
+ lldELF
+ MLIRROCDLToLLVMIRTranslation
+ )
+
+ # Link lldELF also to libmlir.so. Create an alias that starts with LLVM
+ # because LINK_COMPONENTS elements are implicitly prefixed with LLVM.
+ add_library(LLVMAliasTolldELF ALIAS lldELF)
+ set_property(GLOBAL APPEND PROPERTY MLIR_LLVM_LINK_COMPONENTS AliasTolldELF)
+
+endif()
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
new file mode 100644
index 0000000..1369c1e
--- /dev/null
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -0,0 +1,284 @@
+//===- LowerGPUToHSACO.cpp - Convert GPU kernel to HSACO blob -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that serializes a gpu module into HSAco blob and
+// adds that blob as a string attribute of the module.
+//
+//===----------------------------------------------------------------------===//
+#include "mlir/Dialect/GPU/Passes.h"
+
+#if MLIR_GPU_TO_HSACO_PASS_ENABLE
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/FileUtilities.h"
+#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Export.h"
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/LineIterator.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include "lld/Common/Driver.h"
+
+#include "hip/hip_version.h"
+
+#include <mutex>
+
+using namespace mlir;
+
+namespace {
+class SerializeToHsacoPass
+ : public PassWrapper<SerializeToHsacoPass, gpu::SerializeToBlobPass> {
+public:
+ SerializeToHsacoPass();
+
+private:
+ void getDependentDialects(DialectRegistry &registry) const override;
+
+ // Serializes ROCDL to HSACO.
+ std::unique_ptr<std::vector<char>>
+ serializeISA(const std::string &isa) override;
+
+ std::unique_ptr<SmallVectorImpl<char>> assembleIsa(const std::string &isa);
+ std::unique_ptr<std::vector<char>>
+ createHsaco(const SmallVectorImpl<char> &isaBinary);
+};
+} // namespace
+
+static std::string getDefaultChip() {
+ const char kDefaultChip[] = "gfx900";
+
+ // Locate rocm_agent_enumerator.
+ const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
+ llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
+ kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
+ if (!rocmAgentEnumerator) {
+ llvm::WithColor::warning(llvm::errs())
+ << kRocmAgentEnumerator << "couldn't be located under " << __ROCM_PATH__
+ << "/bin\n";
+ return kDefaultChip;
+ }
+
+ // Prepare temp file to hold the outputs.
+ int tempFd = -1;
+ SmallString<128> tempFilename;
+ if (llvm::sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd,
+ tempFilename)) {
+ llvm::WithColor::warning(llvm::errs())
+ << "temporary file for " << kRocmAgentEnumerator << " creation error\n";
+ return kDefaultChip;
+ }
+ llvm::FileRemover cleanup(tempFilename);
+
+ // Invoke rocm_agent_enumerator.
+ std::string errorMessage;
+ SmallVector<StringRef, 2> args{"-t", "GPU"};
+ Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
+ int result =
+ llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
+ redirects, 0, 0, &errorMessage);
+ if (result) {
+ llvm::WithColor::warning(llvm::errs())
+ << kRocmAgentEnumerator << " invocation error: " << errorMessage
+ << "\n";
+ return kDefaultChip;
+ }
+
+ // Load and parse the result.
+ auto gfxIsaList = openInputFile(tempFilename);
+ if (!gfxIsaList) {
+ llvm::WithColor::error(llvm::errs())
+ << "read ROCm agent list temp file error\n";
+ return kDefaultChip;
+ }
+ for (llvm::line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
+ // Skip the line with content "gfx000".
+ if (*lines == "gfx000")
+ continue;
+ // Use the first ISA version found.
+ return lines->str();
+ }
+
+ return kDefaultChip;
+}
+
+// Sets the 'option' to 'value' unless it already has a value.
+static void maybeSetOption(Pass::Option<std::string> &option,
+ function_ref<std::string()> getValue) {
+ if (!option.hasValue())
+ option = getValue();
+}
+
+SerializeToHsacoPass::SerializeToHsacoPass() {
+ maybeSetOption(this->triple, [] { return "amdgcn-amd-amdhsa"; });
+ maybeSetOption(this->chip, [] {
+ static auto chip = getDefaultChip();
+ return chip;
+ });
+}
+
+void SerializeToHsacoPass::getDependentDialects(
+ DialectRegistry &registry) const {
+ registerROCDLDialectTranslation(registry);
+ gpu::SerializeToBlobPass::getDependentDialects(registry);
+}
+
+std::unique_ptr<SmallVectorImpl<char>>
+SerializeToHsacoPass::assembleIsa(const std::string &isa) {
+ auto loc = getOperation().getLoc();
+
+ SmallVector<char, 0> result;
+ llvm::raw_svector_ostream os(result);
+
+ llvm::Triple triple(llvm::Triple::normalize(this->triple));
+ std::string error;
+ const llvm::Target *target =
+ llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
+ if (!target) {
+ emitError(loc, Twine("failed to lookup target: ") + error);
+ return {};
+ }
+
+ llvm::SourceMgr srcMgr;
+ srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
+ llvm::SMLoc());
+
+ const llvm::MCTargetOptions mcOptions;
+ std::unique_ptr<llvm::MCRegisterInfo> mri(
+ target->createMCRegInfo(this->triple));
+ std::unique_ptr<llvm::MCAsmInfo> mai(
+ target->createMCAsmInfo(*mri, this->triple, mcOptions));
+ mai->setRelaxELFRelocations(true);
+
+ llvm::MCObjectFileInfo mofi;
+ llvm::MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions);
+ mofi.InitMCObjectFileInfo(triple, false, ctx, false);
+
+ SmallString<128> cwd;
+ if (!llvm::sys::fs::current_path(cwd))
+ ctx.setCompilationDir(cwd);
+
+ std::unique_ptr<llvm::MCStreamer> mcStreamer;
+ std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
+ std::unique_ptr<llvm::MCSubtargetInfo> sti(
+ target->createMCSubtargetInfo(this->triple, this->chip, this->features));
+
+ llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, *mri, ctx);
+ llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
+ mcStreamer.reset(target->createMCObjectStreamer(
+ triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
+ mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
+ *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ false));
+ mcStreamer->setUseAssemblerInfoForParsing(true);
+
+ std::unique_ptr<llvm::MCAsmParser> parser(
+ createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
+ std::unique_ptr<llvm::MCTargetAsmParser> tap(
+ target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
+
+ if (!tap) {
+ emitError(loc, "assembler initialization error");
+ return {};
+ }
+
+ parser->setTargetParser(*tap);
+ parser->Run(false);
+
+ return std::make_unique<SmallVector<char, 0>>(std::move(result));
+}
+
+std::unique_ptr<std::vector<char>>
+SerializeToHsacoPass::createHsaco(const SmallVectorImpl<char> &isaBinary) {
+ auto loc = getOperation().getLoc();
+
+ // Save the ISA binary to a temp file.
+ int tempIsaBinaryFd = -1;
+ SmallString<128> tempIsaBinaryFilename;
+ if (llvm::sys::fs::createTemporaryFile("kernel", "o", tempIsaBinaryFd,
+ tempIsaBinaryFilename)) {
+ emitError(loc, "temporary file for ISA binary creation error");
+ return {};
+ }
+ llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
+ llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
+ tempIsaBinaryOs << StringRef(isaBinary.data(), isaBinary.size());
+ tempIsaBinaryOs.close();
+
+ // Create a temp file for HSA code object.
+ int tempHsacoFD = -1;
+ SmallString<128> tempHsacoFilename;
+ if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
+ tempHsacoFilename)) {
+ emitError(loc, "temporary file for HSA code object creation error");
+ return {};
+ }
+ llvm::FileRemover cleanupHsaco(tempHsacoFilename);
+
+ {
+ static std::mutex mutex;
+ const std::lock_guard<std::mutex> lock(mutex);
+ // Invoke lld. Expect a true return value from lld.
+ if (!lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
+ "-o", tempHsacoFilename.c_str()},
+ /*canEarlyExit=*/false, llvm::outs(), llvm::errs())) {
+ emitError(loc, "lld invocation error");
+ return {};
+ }
+ }
+
+ // Load the HSA code object.
+ auto hsacoFile = openInputFile(tempHsacoFilename);
+ if (!hsacoFile) {
+ emitError(loc, "read HSA code object from temp file error");
+ return {};
+ }
+
+ StringRef buffer = hsacoFile->getBuffer();
+ return std::make_unique<std::vector<char>>(buffer.begin(), buffer.end());
+}
+
+std::unique_ptr<std::vector<char>>
+SerializeToHsacoPass::serializeISA(const std::string &isa) {
+ auto isaBinary = assembleIsa(isa);
+ if (!isaBinary)
+ return {};
+ return createHsaco(*isaBinary);
+}
+
+// Register pass to serialize GPU kernel functions to a HSACO binary annotation.
+void mlir::registerGpuSerializeToHsacoPass() {
+ PassRegistration<SerializeToHsacoPass> registerSerializeToHSACO(
+ "gpu-to-hsaco", "Lower GPU kernel function to HSACO binary annotations",
+ [] {
+ // Initialize LLVM AMDGPU backend.
+ LLVMInitializeAMDGPUAsmParser();
+ LLVMInitializeAMDGPUAsmPrinter();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTargetMC();
+
+ return std::make_unique<SerializeToHsacoPass>();
+ });
+}
+#else // MLIR_GPU_TO_HSACO_PASS_ENABLE
+void mlir::registerGpuSerializeToHsacoPass() {}
+#endif // MLIR_GPU_TO_HSACO_PASS_ENABLE
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index b9176cf..978bf1a 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_OPTIONAL_SOURCES
CudaRuntimeWrappers.cpp
SparseUtils.cpp
ExecutionEngine.cpp
+ RocmRuntimeWrappers.cpp
RunnerUtils.cpp
OptUtils.cpp
JitRunner.cpp
@@ -136,3 +137,51 @@ if(MLIR_CUDA_RUNNER_ENABLED)
${CUDA_RUNTIME_LIBRARY}
)
endif()
+
+if(MLIR_ROCM_RUNNER_ENABLED)
+ # Configure ROCm support.
+ if (NOT DEFINED ROCM_PATH)
+ if (NOT DEFINED ENV{ROCM_PATH})
+ set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
+ else()
+ set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
+ endif()
+ set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH "Path to which HIP has been installed")
+ endif()
+ set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
+ find_package(HIP)
+ if (NOT HIP_FOUND)
+ message(SEND_ERROR "Building mlir with ROCm support requires a working ROCm and HIP install")
+ else()
+ message(STATUS "ROCm HIP version: ${HIP_VERSION}")
+ endif()
+
+ # Locate HIP runtime library.
+ find_library(ROCM_RUNTIME_LIBRARY amdhip64
+ PATHS "${HIP_PATH}/lib")
+ if (NOT ROCM_RUNTIME_LIBRARY)
+ message(SEND_ERROR "Could not locate ROCm HIP runtime library")
+ else()
+ message(STATUS "ROCm HIP runtime lib: ${ROCM_RUNTIME_LIBRARY}")
+ endif()
+
+ add_mlir_library(mlir_rocm_runtime
+ SHARED
+ RocmRuntimeWrappers.cpp
+
+ EXCLUDE_FROM_LIBMLIR
+ )
+ target_compile_definitions(mlir_rocm_runtime
+ PRIVATE
+ __HIP_PLATFORM_HCC__
+ )
+ target_include_directories(mlir_rocm_runtime
+ PRIVATE
+ ${HIP_PATH}/include
+ ${ROCM_PATH}/include
+ )
+ target_link_libraries(mlir_rocm_runtime
+ PRIVATE
+ ${ROCM_RUNTIME_LIBRARY}
+ )
+endif()
diff --git a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
index 361ba8f..399a373 100644
--- a/mlir/tools/mlir-rocm-runner/rocm-runtime-wrappers.cpp
+++ b/mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp
@@ -1,4 +1,4 @@
-//===- rocm-runtime-wrappers.cpp - MLIR ROCM runner wrapper library -------===//
+//===- RocmRuntimeWrappers.cpp - MLIR ROCM runtime wrapper library --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,29 +30,25 @@
fprintf(stderr, "'%s' failed with '%s'\n", #expr, name); \
}(expr)
-// Static reference to HIP primary context for device ordinal 0.
-static hipCtx_t Context = [] {
- HIP_REPORT_IF_ERROR(hipInit(/*flags=*/0));
- hipDevice_t device;
- HIP_REPORT_IF_ERROR(hipDeviceGet(&device, /*ordinal=*/0));
- hipCtx_t context;
- HIP_REPORT_IF_ERROR(hipDevicePrimaryCtxRetain(&context, device));
- return context;
-}();
-
// Sets the `Context` for the duration of the instance and restores the previous
// context on destruction.
class ScopedContext {
public:
ScopedContext() {
- HIP_REPORT_IF_ERROR(hipCtxGetCurrent(&previous));
- HIP_REPORT_IF_ERROR(hipCtxSetCurrent(Context));
+ // Static reference to HIP primary context for device ordinal 0.
+ static hipCtx_t context = [] {
+ HIP_REPORT_IF_ERROR(hipInit(/*flags=*/0));
+ hipDevice_t device;
+ HIP_REPORT_IF_ERROR(hipDeviceGet(&device, /*ordinal=*/0));
+ hipCtx_t ctx;
+ HIP_REPORT_IF_ERROR(hipDevicePrimaryCtxRetain(&ctx, device));
+ return ctx;
+ }();
+
+ HIP_REPORT_IF_ERROR(hipCtxPushCurrent(context));
}
- ~ScopedContext() { HIP_REPORT_IF_ERROR(hipCtxSetCurrent(previous)); }
-
-private:
- hipCtx_t previous;
+ ~ScopedContext() { HIP_REPORT_IF_ERROR(hipCtxPopCurrent(nullptr)); }
};
extern "C" hipModule_t mgpuModuleLoad(void *data) {
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index 69d123d..775a462 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -21,8 +21,7 @@ set(MLIR_DIALECT_LINALG_INTEGRATION_TEST_LIB_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTOR
set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
# Passed to lit.site.cfg.py.in to set up the path where to find the libraries
-# for the mlir rocm / spirv / vulkan runner tests.
-set(MLIR_ROCM_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+# for the mlir spirv / vulkan runner tests.
set(MLIR_SPIRV_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
set(MLIR_VULKAN_WRAPPER_LIBRARY_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
@@ -75,6 +74,10 @@ if(MLIR_CUDA_RUNNER_ENABLED)
list(APPEND MLIR_TEST_DEPENDS mlir_cuda_runtime)
endif()
+if(MLIR_ROCM_RUNNER_ENABLED)
+ list(APPEND MLIR_TEST_DEPENDS mlir_rocm_runtime)
+endif()
+
list(APPEND MLIR_TEST_DEPENDS MLIRUnitTests)
if(LLVM_BUILD_EXAMPLES)
@@ -89,12 +92,6 @@ if(LLVM_BUILD_EXAMPLES)
)
endif()
-if(MLIR_ROCM_RUNNER_ENABLED)
- list(APPEND MLIR_TEST_DEPENDS
- mlir-rocm-runner
- )
-endif()
-
if(MLIR_SPIRV_CPU_RUNNER_ENABLED)
add_subdirectory(mlir-spirv-cpu-runner)
list(APPEND MLIR_TEST_DEPENDS
diff --git a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
index 3d7deb9..fb19ac6 100644
--- a/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
+++ b/mlir/test/Conversion/GPUToROCm/lower-rocdl-kernel-to-hsaco.mlir
@@ -1,6 +1,6 @@
-// RUN: mlir-opt %s --test-kernel-to-hsaco -split-input-file | FileCheck %s
+// RUN: mlir-opt %s --test-gpu-to-hsaco | FileCheck %s
-// CHECK: attributes {rocdl.hsaco = "HSACO"}
+// CHECK: gpu.module @foo attributes {gpu.binary = "HSACO"}
gpu.module @foo {
llvm.func @kernel(%arg0 : f32, %arg1 : !llvm.ptr<f32>)
// CHECK: attributes {gpu.kernel}
@@ -9,8 +9,7 @@ gpu.module @foo {
}
}
-// -----
-
+// CHECK: gpu.module @bar attributes {gpu.binary = "HSACO"}
gpu.module @bar {
// CHECK: func @kernel_a
llvm.func @kernel_a()
diff --git a/mlir/test/Integration/GPU/CUDA/lit.local.cfg b/mlir/test/Integration/GPU/CUDA/lit.local.cfg
index b063ddd..0bdebfe 100644
--- a/mlir/test/Integration/GPU/CUDA/lit.local.cfg
+++ b/mlir/test/Integration/GPU/CUDA/lit.local.cfg
@@ -1,2 +1,2 @@
if not config.enable_cuda_runner:
- config.unsupported = True \ No newline at end of file
+ config.unsupported = True
diff --git a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
index 3f2d44f..fdc525b 100644
--- a/mlir/test/mlir-rocm-runner/gpu-to-hsaco.mlir
+++ b/mlir/test/Integration/GPU/ROCM/gpu-to-hsaco.mlir
@@ -1,5 +1,9 @@
-// RUN: mlir-rocm-runner %s \
-// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
diff --git a/mlir/test/mlir-rocm-runner/lit.local.cfg b/mlir/test/Integration/GPU/ROCM/lit.local.cfg
index 0ced069..0ced069 100644
--- a/mlir/test/mlir-rocm-runner/lit.local.cfg
+++ b/mlir/test/Integration/GPU/ROCM/lit.local.cfg
diff --git a/mlir/test/mlir-rocm-runner/two-modules.mlir b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
index 7c0faae..3c6c56b 100644
--- a/mlir/test/mlir-rocm-runner/two-modules.mlir
+++ b/mlir/test/Integration/GPU/ROCM/two-modules.mlir
@@ -1,5 +1,9 @@
-// RUN: mlir-rocm-runner %s \
-// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
+// RUN: mlir-opt %s \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
@@ -30,5 +34,5 @@ func @main() {
return
}
-func @mgpuMemGetDeviceMemRef1dInt32(%ptr : memref<?xi32>) -> (memref<?xi32>)
-func @print_memref_i32(%ptr : memref<*xi32>)
+func private @mgpuMemGetDeviceMemRef1dInt32(%ptr : memref<?xi32>) -> (memref<?xi32>)
+func private @print_memref_i32(%ptr : memref<*xi32>)
diff --git a/mlir/test/mlir-rocm-runner/vecadd.mlir b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
index d4dc862..917be3c 100644
--- a/mlir/test/mlir-rocm-runner/vecadd.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vecadd.mlir
@@ -1,5 +1,10 @@
-// RUN: mlir-rocm-runner %s \
-// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
+// RUN: mlir-opt %s \
+// RUN: -convert-scf-to-std \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
diff --git a/mlir/test/mlir-rocm-runner/vector-transferops.mlir b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
index eda541a..c2807b6 100644
--- a/mlir/test/mlir-rocm-runner/vector-transferops.mlir
+++ b/mlir/test/Integration/GPU/ROCM/vector-transferops.mlir
@@ -1,5 +1,10 @@
-// RUN: mlir-rocm-runner %s \
-// RUN: --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext \
+// RUN: mlir-opt %s \
+// RUN: -convert-scf-to-std \
+// RUN: -gpu-kernel-outlining \
+// RUN: -pass-pipeline='gpu.module(strip-debuginfo,convert-gpu-to-rocdl,gpu-to-hsaco)' \
+// RUN: -gpu-to-llvm \
+// RUN: | mlir-cpu-runner \
+// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_rocm_runtime%shlibext \
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
diff --git a/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp b/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp
index 58e890b..5a3cb33 100644
--- a/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp
+++ b/mlir/test/lib/Transforms/TestConvertGPUKernelToHsaco.cpp
@@ -6,11 +6,9 @@
//
//===----------------------------------------------------------------------===//
-#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
+#include "mlir/Dialect/GPU/Passes.h"
+
#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "llvm/Support/TargetSelect.h"
@@ -18,38 +16,54 @@
using namespace mlir;
#if MLIR_ROCM_CONVERSIONS_ENABLED
-static OwnedBlob compileIsaToHsacoForTesting(const std::string &, Location,
- StringRef) {
- const char data[] = "HSACO";
- return std::make_unique<std::vector<char>>(data, data + sizeof(data) - 1);
+namespace {
+class TestSerializeToHsacoPass
+ : public PassWrapper<TestSerializeToHsacoPass, gpu::SerializeToBlobPass> {
+public:
+ TestSerializeToHsacoPass();
+
+private:
+ void getDependentDialects(DialectRegistry &registry) const override;
+
+ // Serializes ROCDL IR to HSACO.
+ std::unique_ptr<std::vector<char>>
+ serializeISA(const std::string &isa) override;
+};
+} // namespace
+
+TestSerializeToHsacoPass::TestSerializeToHsacoPass() {
+ this->triple = "amdgcn-amd-amdhsa";
+ this->chip = "gfx900";
+}
+
+void TestSerializeToHsacoPass::getDependentDialects(
+ DialectRegistry &registry) const {
+ registerROCDLDialectTranslation(registry);
+ gpu::SerializeToBlobPass::getDependentDialects(registry);
}
-static std::unique_ptr<llvm::Module>
-translateModuleToROCDL(Operation *m, llvm::LLVMContext &llvmContext,
- StringRef moduleName) {
- registerLLVMDialectTranslation(*m->getContext());
- registerROCDLDialectTranslation(*m->getContext());
- return translateModuleToLLVMIR(m, llvmContext, moduleName);
+std::unique_ptr<std::vector<char>>
+TestSerializeToHsacoPass::serializeISA(const std::string &) {
+ std::string data = "HSACO";
+ return std::make_unique<std::vector<char>>(data.begin(), data.end());
}
namespace mlir {
namespace test {
-void registerTestConvertGPUKernelToHsacoPass() {
- PassPipelineRegistration<>(
- "test-kernel-to-hsaco",
- "Convert all kernel functions to ROCm hsaco blobs",
- [](OpPassManager &pm) {
+// Register test pass to serialize GPU module to a HSAco binary annotation.
+void registerTestGpuSerializeToHsacoPass() {
+ PassRegistration<TestSerializeToHsacoPass> registerSerializeToHsaco(
+ "test-gpu-to-hsaco",
+ "Lower GPU kernel function to HSAco binary annotations", [] {
// Initialize LLVM AMDGPU backend.
LLVMInitializeAMDGPUTarget();
LLVMInitializeAMDGPUTargetInfo();
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
- pm.addPass(createConvertGPUKernelToBlobPass(
- translateModuleToROCDL, compileIsaToHsacoForTesting,
- "amdgcn-amd-amdhsa", "gfx900", "-code-object-v3", "rocdl.hsaco"));
+ return std::make_unique<TestSerializeToHsacoPass>();
});
}
} // namespace test
} // namespace mlir
-#endif
+#endif // MLIR_ROCM_CONVERSIONS_ENABLED
diff --git a/mlir/test/lit.cfg.py b/mlir/test/lit.cfg.py
index 4ba3620..199d722 100644
--- a/mlir/test/lit.cfg.py
+++ b/mlir/test/lit.cfg.py
@@ -77,7 +77,6 @@ tools.extend([
ToolSubst('toy-ch5', unresolved='ignore'),
ToolSubst('%linalg_test_lib_dir', config.linalg_test_lib_dir, unresolved='ignore'),
ToolSubst('%mlir_runner_utils_dir', config.mlir_runner_utils_dir, unresolved='ignore'),
- ToolSubst('%rocm_wrapper_library_dir', config.rocm_wrapper_library_dir, unresolved='ignore'),
ToolSubst('%spirv_wrapper_library_dir', config.spirv_wrapper_library_dir, unresolved='ignore'),
ToolSubst('%vulkan_wrapper_library_dir', config.vulkan_wrapper_library_dir, unresolved='ignore'),
ToolSubst('%mlir_integration_test_dir', config.mlir_integration_test_dir, unresolved='ignore'),
diff --git a/mlir/test/lit.site.cfg.py.in b/mlir/test/lit.site.cfg.py.in
index 0015c13..dbc8460 100644
--- a/mlir/test/lit.site.cfg.py.in
+++ b/mlir/test/lit.site.cfg.py.in
@@ -39,7 +39,6 @@ config.build_examples = @LLVM_BUILD_EXAMPLES@
config.run_cuda_tests = @MLIR_CUDA_CONVERSIONS_ENABLED@
config.enable_cuda_runner = @MLIR_CUDA_RUNNER_ENABLED@
config.run_rocm_tests = @MLIR_ROCM_CONVERSIONS_ENABLED@
-config.rocm_wrapper_library_dir = "@MLIR_ROCM_WRAPPER_LIBRARY_DIR@"
config.enable_rocm_runner = @MLIR_ROCM_RUNNER_ENABLED@
config.spirv_wrapper_library_dir = "@MLIR_SPIRV_WRAPPER_LIBRARY_DIR@"
config.enable_spirv_cpu_runner = @MLIR_SPIRV_CPU_RUNNER_ENABLED@
diff --git a/mlir/tools/CMakeLists.txt b/mlir/tools/CMakeLists.txt
index 37793ce..ac9ca81 100644
--- a/mlir/tools/CMakeLists.txt
+++ b/mlir/tools/CMakeLists.txt
@@ -1,7 +1,6 @@
add_subdirectory(mlir-cpu-runner)
add_subdirectory(mlir-opt)
add_subdirectory(mlir-reduce)
-add_subdirectory(mlir-rocm-runner)
add_subdirectory(mlir-shlib)
add_subdirectory(mlir-spirv-cpu-runner)
add_subdirectory(mlir-translate)
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 241cee5..428b3d5 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -65,7 +65,7 @@ void registerTestCallGraphPass();
void registerTestConstantFold();
void registerTestConvVectorization();
void registerTestGpuSerializeToCubinPass();
-void registerTestConvertGPUKernelToHsacoPass();
+void registerTestGpuSerializeToHsacoPass();
void registerTestDataLayoutQuery();
void registerTestDecomposeCallGraphTypes();
void registerTestDialect(DialectRegistry &);
@@ -140,7 +140,7 @@ void registerTestPasses() {
test::registerTestGpuSerializeToCubinPass();
#endif
#if MLIR_ROCM_CONVERSIONS_ENABLED
- test::registerTestConvertGPUKernelToHsacoPass();
+ test::registerTestGpuSerializeToHsacoPass();
#endif
test::registerTestConvVectorization();
test::registerTestDecomposeCallGraphTypes();
diff --git a/mlir/tools/mlir-rocm-runner/CMakeLists.txt b/mlir/tools/mlir-rocm-runner/CMakeLists.txt
deleted file mode 100644
index d238141..0000000
--- a/mlir/tools/mlir-rocm-runner/CMakeLists.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-set(LLVM_OPTIONAL_SOURCES
- rocm-runtime-wrappers.cpp
- mlir-rocm-runner.cpp
- )
-
-if(MLIR_ROCM_RUNNER_ENABLED)
- if (NOT ("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD))
- message(SEND_ERROR
- "Building the mlir rocm runner requires the AMDGPU backend")
- endif()
-
- # Ensure lld is enabled.
- if (NOT "lld" IN_LIST LLVM_ENABLE_PROJECTS)
- message(SEND_ERROR "lld is not enabled. Please revise LLVM_ENABLE_PROJECTS")
- endif()
-
- # lld header files.
- include_directories(${MLIR_SOURCE_DIR}/../lld/include)
-
- # Configure ROCm support.
- if (NOT DEFINED ROCM_PATH)
- if (NOT DEFINED ENV{ROCM_PATH})
- set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to which ROCm has been installed")
- else()
- set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
- endif()
- set(HIP_PATH "${ROCM_PATH}/hip" CACHE PATH " Path to which HIP has been installed")
- endif()
- set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
- find_package(HIP)
- if (NOT HIP_FOUND)
- message(SEND_ERROR "Build the mlir rocm runner requires a working ROCm and HIP install")
- else()
- message(STATUS "ROCm HIP version: ${HIP_VERSION}")
- endif()
-
- # Set compile-time flags for ROCm path.
- add_definitions(-D__ROCM_PATH__="${ROCM_PATH}")
-
- # Locate HIP runtime library.
- find_library(ROCM_RUNTIME_LIBRARY amdhip64
- PATHS "${HIP_PATH}/lib")
- if (NOT ROCM_RUNTIME_LIBRARY)
- message(SEND_ERROR "Could not locate ROCm HIP runtime library")
- else()
- message(STATUS "ROCm HIP runtime lib: ${ROCM_RUNTIME_LIBRARY}")
- endif()
-
- # Set HIP compile-time flags.
- add_definitions(-D__HIP_PLATFORM_HCC__)
-
- add_mlir_library(rocm-runtime-wrappers
- SHARED
- rocm-runtime-wrappers.cpp
-
- EXCLUDE_FROM_LIBMLIR
- )
- target_include_directories(rocm-runtime-wrappers
- PRIVATE
- "${HIP_PATH}/../include"
- "${HIP_PATH}/include"
- )
- target_link_libraries(rocm-runtime-wrappers
- PRIVATE
- ${ROCM_RUNTIME_LIBRARY}
- )
-
- get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
- set(LIBS
- ${conversion_libs}
- lldCommon
- lldDriver
- lldELF
- MLIRJitRunner
- MLIRAnalysis
- MLIREDSC
- MLIRExecutionEngine
- MLIRGPU
- MLIRIR
- MLIRLLVMIR
- MLIRLLVMToLLVMIRTranslation
- MLIRParser
- MLIRROCDLIR
- MLIRStandard
- MLIRSupport
- MLIRTargetLLVMIRExport
- MLIRROCDLToLLVMIRTranslation
- MLIRTransforms
- MLIRTranslation
- ${ROCM_RUNTIME_LIBRARY}
- )
-
- # Manually expand the target library, since our MLIR libraries
- # aren't plugged into the LLVM dependency tracking. If we don't
- # do this then we can't insert the CodeGen library after ourselves
- llvm_expand_pseudo_components(TARGET_LIBS AllTargetsCodeGens AllTargetsAsmParsers)
- # Prepend LLVM in front of every target, this is how the library
- # are named with CMake
- SET(targets_to_link)
- FOREACH(t ${TARGET_LIBS})
- LIST(APPEND targets_to_link "LLVM${t}")
- ENDFOREACH(t)
-
- add_llvm_tool(mlir-rocm-runner
- mlir-rocm-runner.cpp
-
- DEPENDS
- rocm-runtime-wrappers
-
- LINK_COMPONENTS
-
- Core
- LTO
- MC
- MCParser
- Option
- Support
- )
- llvm_update_compile_flags(mlir-rocm-runner)
- target_include_directories(mlir-rocm-runner
- PRIVATE
- "${HIP_PATH}/../include"
- "${HIP_PATH}/include"
- )
- target_link_libraries(mlir-rocm-runner PRIVATE ${LIBS} ${targets_to_link})
-
-endif()
diff --git a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp b/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp
deleted file mode 100644
index c2f9abb..0000000
--- a/mlir/tools/mlir-rocm-runner/mlir-rocm-runner.cpp
+++ /dev/null
@@ -1,349 +0,0 @@
-//===- mlir-rocm-runner.cpp - MLIR ROCM Execution Driver-------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is a command line utility that executes an MLIR file on the GPU by
-// translating MLIR to ROCDL/LLVM IR before JIT-compiling and executing the
-// latter.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/STLExtras.h"
-
-#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
-#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
-#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
-#include "mlir/Dialect/GPU/Passes.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/ExecutionEngine/JitRunner.h"
-#include "mlir/ExecutionEngine/OptUtils.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/FileUtilities.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Export.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-#include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/LineIterator.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TargetSelect.h"
-
-// MC headers.
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectFileInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCParser/AsmLexer.h"
-#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCTargetOptionsCommandFlags.h"
-
-// lld headers.
-#include "lld/Common/Driver.h"
-
-// HIP headers.
-#include "hip/hip_version.h"
-
-#include <mutex>
-
-using namespace mlir;
-using namespace llvm;
-
-using Blob = SmallVector<char, 0>;
-
-static cl::opt<std::string> tripleName("triple", cl::desc("target triple"),
- cl::value_desc("triple string"),
- cl::init("amdgcn-amd-amdhsa"));
-
-static cl::opt<std::string> targetChip("target", cl::desc("target chip"),
- cl::value_desc("AMDGPU ISA version"),
- cl::init(""));
-
-static cl::opt<std::string> features("feature", cl::desc("target features"),
- cl::value_desc("AMDGPU target features"),
- cl::init(""));
-
-static constexpr const char kRunnerProgram[] = "mlir-rocm-runner";
-static constexpr const char kRocmAgentEnumerator[] = "rocm_agent_enumerator";
-static constexpr const char kDefaultTargetChip[] = "gfx900";
-
-static LogicalResult assembleIsa(const std::string isa, StringRef name,
- Blob &result) {
- raw_svector_ostream os(result);
-
- std::string error;
- Triple theTriple(Triple::normalize(tripleName));
- const Target *theTarget =
- TargetRegistry::lookupTarget(theTriple.normalize(), error);
- if (!theTarget) {
- WithColor::error(errs(), name) << error;
- return failure();
- }
-
- SourceMgr srcMgr;
- srcMgr.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(isa), SMLoc());
-
- const MCTargetOptions mcOptions;
- std::unique_ptr<MCRegisterInfo> mri(theTarget->createMCRegInfo(tripleName));
- std::unique_ptr<MCAsmInfo> mai(
- theTarget->createMCAsmInfo(*mri, tripleName, mcOptions));
- mai->setRelaxELFRelocations(true);
-
- MCObjectFileInfo mofi;
- MCContext ctx(mai.get(), mri.get(), &mofi, &srcMgr, &mcOptions);
- mofi.InitMCObjectFileInfo(theTriple, false, ctx, false);
-
- SmallString<128> cwd;
- if (!sys::fs::current_path(cwd))
- ctx.setCompilationDir(cwd);
-
- std::unique_ptr<MCStreamer> mcStreamer;
- std::unique_ptr<MCInstrInfo> mcii(theTarget->createMCInstrInfo());
- std::unique_ptr<MCSubtargetInfo> sti(
- theTarget->createMCSubtargetInfo(tripleName, targetChip, features));
-
- MCCodeEmitter *ce = theTarget->createMCCodeEmitter(*mcii, *mri, ctx);
- MCAsmBackend *mab = theTarget->createMCAsmBackend(*sti, *mri, mcOptions);
- mcStreamer.reset(theTarget->createMCObjectStreamer(
- theTriple, ctx, std::unique_ptr<MCAsmBackend>(mab),
- mab->createObjectWriter(os), std::unique_ptr<MCCodeEmitter>(ce), *sti,
- mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
- /*DWARFMustBeAtTheEnd*/ false));
- mcStreamer->setUseAssemblerInfoForParsing(true);
-
- std::unique_ptr<MCAsmParser> parser(
- createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
- std::unique_ptr<MCTargetAsmParser> tap(
- theTarget->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
-
- if (!tap) {
- WithColor::error(errs(), name) << "assembler initialization error.\n";
- return failure();
- }
-
- parser->setTargetParser(*tap);
- parser->Run(false);
-
- return success();
-}
-
-static std::mutex mutex;
-static LogicalResult createHsaco(const Blob &isaBlob, StringRef name,
- Blob &hsacoBlob) {
- // Save the ISA binary to a temp file.
- int tempIsaBinaryFd = -1;
- SmallString<128> tempIsaBinaryFilename;
- std::error_code ec = sys::fs::createTemporaryFile(
- "kernel", "o", tempIsaBinaryFd, tempIsaBinaryFilename);
- if (ec) {
- WithColor::error(errs(), name)
- << "temporary file for ISA binary creation error.\n";
- return failure();
- }
- FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
- raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
- tempIsaBinaryOs << isaBlob;
- tempIsaBinaryOs.close();
-
- // Create a temp file for HSA code object.
- int tempHsacoFD = -1;
- SmallString<128> tempHsacoFilename;
- ec = sys::fs::createTemporaryFile("kernel", "hsaco", tempHsacoFD,
- tempHsacoFilename);
- if (ec) {
- WithColor::error(errs(), name)
- << "temporary file for HSA code object creation error.\n";
- return failure();
- }
- FileRemover cleanupHsaco(tempHsacoFilename);
-
- const std::lock_guard<std::mutex> lock(mutex);
- // Invoke lld. Expect a true return value from lld.
- bool ret = lld::elf::link({"ld.lld", "-shared", tempIsaBinaryFilename.c_str(),
- "-o", tempHsacoFilename.c_str()},
- /*canEarlyExit=*/false, llvm::outs(), llvm::errs());
- if (!ret) {
- WithColor::error(errs(), name) << "lld invocation error.\n";
- return failure();
- }
-
- // Load the HSA code object.
- auto hsacoFile = mlir::openInputFile(tempHsacoFilename);
- if (!hsacoFile) {
- WithColor::error(errs(), name)
- << "read HSA code object from temp file error.\n";
- return failure();
- }
- hsacoBlob.assign(hsacoFile->getBuffer().begin(),
- hsacoFile->getBuffer().end());
-
- return success();
-}
-
-static std::unique_ptr<llvm::Module>
-compileModuleToROCDLIR(Operation *m, llvm::LLVMContext &llvmContext,
- StringRef name) {
- auto llvmModule = translateModuleToROCDLIR(m, llvmContext, name);
- // TODO: Link with ROCm-Device-Libs in case needed (ex: the Module
- // depends on math functions).
- return llvmModule;
-}
-
-static OwnedBlob compileISAToHsaco(const std::string isa, Location loc,
- StringRef name) {
- // ISA -> ISA in binary form via MC.
- // Use lld to create HSA code object.
- Blob isaBlob;
- Blob hsacoBlob;
-
- if (succeeded(assembleIsa(isa, name, isaBlob)) &&
- succeeded(createHsaco(isaBlob, name, hsacoBlob)))
- return std::make_unique<std::vector<char>>(hsacoBlob.begin(),
- hsacoBlob.end());
-
- WithColor::error(errs(), name) << "producing HSA code object error.\n";
- return {};
-}
-
-static void configTargetChip() {
- // Set targetChip to default value first.
- targetChip = kDefaultTargetChip;
-
- // Locate rocm_agent_enumerator.
- llvm::ErrorOr<std::string> rocmAgentEnumerator = llvm::sys::findProgramByName(
- kRocmAgentEnumerator, {__ROCM_PATH__ "/bin"});
- std::error_code ec;
- if ((ec = rocmAgentEnumerator.getError())) {
- WithColor::warning(errs(), kRunnerProgram)
- << kRocmAgentEnumerator << " couldn't be located under "
- << __ROCM_PATH__ << ", set target as " << kDefaultTargetChip << "\n";
- return;
- }
-
- // Prepare temp file to hold the outputs.
- int tempFd = -1;
- SmallString<128> tempFilename;
- ec = sys::fs::createTemporaryFile("rocm_agent", "txt", tempFd, tempFilename);
- if (ec) {
- WithColor::warning(errs(), kRunnerProgram)
- << "temporary file for " << kRocmAgentEnumerator
- << " creation error, set target as " << kDefaultTargetChip << "\n";
- return;
- }
- FileRemover cleanup(tempFilename);
-
- // Invoke rocm_agent_enumerator.
- std::string errorMessage;
- SmallVector<StringRef, 2> args{"-t", "GPU"};
- Optional<StringRef> redirects[3] = {{""}, tempFilename.str(), {""}};
- int result =
- llvm::sys::ExecuteAndWait(rocmAgentEnumerator.get(), args, llvm::None,
- redirects, 0, 0, &errorMessage);
- if (result) {
- WithColor::warning(errs(), kRunnerProgram)
- << kRocmAgentEnumerator << " invocation error: " << errorMessage
- << ", set target as " << kDefaultTargetChip << "\n";
- return;
- }
-
- // Load and parse the result.
- auto gfxIsaList = mlir::openInputFile(tempFilename);
- if (!gfxIsaList) {
- WithColor::error(errs(), kRunnerProgram)
- << "read ROCm agent list temp file error, set target as "
- << kDefaultTargetChip << "\n";
- return;
- }
- for (line_iterator lines(*gfxIsaList); !lines.is_at_end(); ++lines) {
- // Skip the line with content "gfx000".
- if (*lines == "gfx000")
- continue;
- // Use the first ISA version found.
- targetChip = lines->str();
- break;
- }
-}
-
-static void configTargetFeatures() {
- if (features.size() > 0)
- features += ",";
- // After ROCm 3.5, adopt HSA code object V3.
- if (HIP_VERSION_MAJOR >= 3 && HIP_VERSION_MINOR >= 5)
- features += "+code-object-v3";
- else
- features += "-code-object-v3";
-}
-
-static LogicalResult runMLIRPasses(ModuleOp m) {
- PassManager pm(m.getContext());
- applyPassManagerCLOptions(pm);
-
- // Configure target chip ISA version if it has not been specified.
- if (!targetChip.size())
- configTargetChip();
-
- // Configure target features per ROCm / HIP version.
- configTargetFeatures();
-
- const char gpuBinaryAnnotation[] = "rocdl.hsaco";
- pm.addPass(createLowerToCFGPass());
- pm.addPass(createGpuKernelOutliningPass());
- auto &kernelPm = pm.nest<gpu::GPUModuleOp>();
- kernelPm.addPass(createStripDebugInfoPass());
- kernelPm.addPass(createLowerGpuOpsToROCDLOpsPass());
- kernelPm.addPass(createConvertGPUKernelToBlobPass(
- compileModuleToROCDLIR, compileISAToHsaco, tripleName, targetChip,
- features, gpuBinaryAnnotation));
- pm.addPass(createGpuToLLVMConversionPass(gpuBinaryAnnotation));
-
- return pm.run(m);
-}
-
-int main(int argc, char **argv) {
- registerPassManagerCLOptions();
- llvm::InitLLVM y(argc, argv);
- llvm::InitializeAllTargetInfos();
- llvm::InitializeAllTargetMCs();
- llvm::InitializeAllAsmParsers();
-
- // Initialize LLVM AMDGPU backend.
- LLVMInitializeAMDGPUTarget();
- LLVMInitializeAMDGPUTargetInfo();
- LLVMInitializeAMDGPUTargetMC();
- LLVMInitializeAMDGPUAsmPrinter();
-
- mlir::initializeLLVMPasses();
-
- mlir::JitRunnerConfig jitRunnerConfig;
- jitRunnerConfig.mlirTransformer = runMLIRPasses;
-
- mlir::DialectRegistry registry;
- registry.insert<mlir::LLVM::LLVMDialect, mlir::gpu::GPUDialect,
- mlir::ROCDL::ROCDLDialect, mlir::StandardOpsDialect>();
- mlir::registerLLVMDialectTranslation(registry);
- mlir::registerROCDLDialectTranslation(registry);
-
- return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig);
-}