diff options
Diffstat (limited to 'mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp')
-rw-r--r-- | mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp new file mode 100644 index 0000000..1a1485b --- /dev/null +++ b/mlir/lib/Dialect/GPU/Pipelines/GPUToXeVMPipeline.cpp @@ -0,0 +1,139 @@ +//===- GPUToXeVMPipeline.cpp - Lowering pipeline to XeVM/LLVM -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass for testing the lowering to XeVM as a generally +// usable sink pass. If XeGPU ops are used, it expects the MLIR code to have +// XeGPU ops already embedded in gpu code. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Conversion/AffineToStandard/AffineToStandard.h" +#include "mlir/Conversion/MathToXeVM/MathToXeVM.h" +#include "mlir/Conversion/Passes.h" +#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" +#include "mlir/Conversion/VectorToSCF/VectorToSCF.h" +#include "mlir/Conversion/XeGPUToXeVM/XeGPUToXeVM.h" +#include "mlir/Conversion/XeVMToLLVM/XeVMToLLVM.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Dialect/GPU/Pipelines/Passes.h" +#include "mlir/Dialect/GPU/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/Transforms/RequestCWrappers.h" +#include "mlir/Dialect/MemRef/Transforms/Passes.h" +#include "mlir/Dialect/XeGPU/Transforms/Passes.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassOptions.h" +#include "mlir/Target/LLVM/XeVM/Target.h" +#include "mlir/Transforms/Passes.h" + +using namespace mlir; + +namespace { +//===----------------------------------------------------------------------===// +// Pre-GPU common pipeline for both Host and GPU. +//===----------------------------------------------------------------------===// +void buildPreGPUCommonPassPipeline( + OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) { + // builtin.module scope passes. + pm.addPass(createCSEPass()); + pm.addPass(createConvertVectorToSCFPass()); + { + GpuXeVMAttachTargetOptions xevmTargetOptions; + xevmTargetOptions.moduleMatcher = options.xevmModuleMatcher; + xevmTargetOptions.triple = options.zebinTriple; + xevmTargetOptions.chip = options.zebinChip; + xevmTargetOptions.optLevel = options.optLevel; + xevmTargetOptions.cmdOptions = options.cmdOptions; + pm.addPass(createGpuXeVMAttachTarget(xevmTargetOptions)); + } + pm.addPass(createLowerAffinePass()); + pm.addNestedPass<func::FuncOp>(createGpuAsyncRegionPass()); +} + +//===----------------------------------------------------------------------===// +// GPUModule-specific stuff. +//===----------------------------------------------------------------------===// +void buildGPUPassPipeline(OpPassManager &pm, + const mlir::gpu::GPUToXeVMPipelineOptions &options) { + if (options.xegpuOpLevel == "workgroup") { + pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUWgToSgDistribute()); + pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); + pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUBlocking()); + pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass()); + pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); + } + if (options.xegpuOpLevel == "subgroup" || + options.xegpuOpLevel == "workgroup") { + pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUPropagateLayout()); + pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUSubgroupDistribute()); + pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass()); + pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); + pm.addNestedPass<gpu::GPUModuleOp>(createLoopInvariantCodeMotionPass()); + pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); + pm.addNestedPass<gpu::GPUModuleOp>(xegpu::createXeGPUVectorLinearize()); + } + pm.addNestedPass<gpu::GPUModuleOp>(createConvertMathToXeVM()); + pm.addNestedPass<gpu::GPUModuleOp>(createConvertXeGPUToXeVMPass()); + { + ConvertGpuOpsToLLVMSPVOpsOptions gpuToLLVMSPVOptions; + gpuToLLVMSPVOptions.use64bitIndex = options.use64bitIndex; + pm.addNestedPass<gpu::GPUModuleOp>( + createConvertGpuOpsToLLVMSPVOps(gpuToLLVMSPVOptions)); + } + pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass()); + pm.addNestedPass<gpu::GPUModuleOp>(createReconcileUnrealizedCastsPass()); +} + +//===----------------------------------------------------------------------===// +// Post-GPU pipeline for both Host and GPU. +//===----------------------------------------------------------------------===// +void buildPostGPUCommonPassPipeline( + OpPassManager &pm, const mlir::gpu::GPUToXeVMPipelineOptions &options) { + // builtin.module scope passes. + pm.addPass(createSCFToControlFlowPass()); + pm.addPass(memref::createExpandStridedMetadataPass()); + { + GpuToLLVMConversionPassOptions gpuToLLVMOptions; + gpuToLLVMOptions.hostBarePtrCallConv = options.hostBarePtrCallConv; + gpuToLLVMOptions.kernelBarePtrCallConv = options.kernelBarePtrCallConv; + pm.addPass(createGpuToLLVMConversionPass(gpuToLLVMOptions)); + } + pm.addPass(createLowerAffinePass()); + pm.addPass(createConvertToLLVMPass()); + pm.addPass(createReconcileUnrealizedCastsPass()); + // gpu-module-to-binary + { + GpuModuleToBinaryPassOptions gpuToModuleBinOptions; + gpuToModuleBinOptions.compilationTarget = options.binaryFormat; + gpuToModuleBinOptions.cmdOptions = options.cmdOptions; + pm.addPass(createGpuModuleToBinaryPass(gpuToModuleBinOptions)); + } +} +} // namespace + +void mlir::gpu::buildLowerToXeVMPassPipeline( + OpPassManager &pm, const GPUToXeVMPipelineOptions &options) { + // Pre-GPU common pipelines. + buildPreGPUCommonPassPipeline(pm, options); + + // GPUModule-specific stuff. + buildGPUPassPipeline(pm, options); + + // Post-GPU pipeline for both Host and GPU. + buildPostGPUCommonPassPipeline(pm, options); +} + +void mlir::gpu::registerGPUToXeVMPipeline() { + PassPipelineRegistration<GPUToXeVMPipelineOptions>( + "gpu-lower-to-xevm-pipeline", + "The default GPU to XeVM lowering pipeline. It starts by lowering GPU " + "code to the " + "specified compilation target (default is fatbin) then lowers the host " + "code.", + buildLowerToXeVMPassPipeline); +} |