//===- GPUOpsLowering.h - GPU FuncOp / ReturnOp lowering -------*- C++ -*--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ #define MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_ #include "mlir/Conversion/LLVMCommon/Pattern.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" namespace mlir { //===----------------------------------------------------------------------===// // Helper Functions //===----------------------------------------------------------------------===// /// Find or create an external function declaration in the given module. LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, OpBuilder &b, StringRef name, LLVM::LLVMFunctionType type); /// Create a global that contains the given string. If a global with the same /// string already exists in the module, return that global. LLVM::GlobalOp getOrCreateStringConstant(OpBuilder &b, Location loc, gpu::GPUModuleOp moduleOp, Type llvmI8, StringRef namePrefix, StringRef str, uint64_t alignment = 0, unsigned addrSpace = 0); //===----------------------------------------------------------------------===// // Lowering Patterns //===----------------------------------------------------------------------===// /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first /// create a 0-sized global array symbol similar as LLVM expects. It constructs /// a memref descriptor with these values and return it. struct GPUDynamicSharedMemoryOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern< gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; GPUDynamicSharedMemoryOpLowering(const LLVMTypeConverter &converter, unsigned alignmentBit = 0, PatternBenefit benefit = 1) : ConvertOpToLLVMPattern(converter, benefit), alignmentBit(alignmentBit) {} LogicalResult matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; private: // Alignment bit unsigned alignmentBit; }; struct GPUFuncOpLoweringOptions { /// The address space to use for `alloca`s in private memory. unsigned allocaAddrSpace; /// The address space to use declaring workgroup memory. unsigned workgroupAddrSpace; /// The attribute name to use instead of `gpu.kernel`. Null if no attribute /// should be used. StringAttr kernelAttributeName; /// The attribute name to to set block size. Null if no attribute should be /// used. StringAttr kernelBlockSizeAttributeName; /// The calling convention to use for kernel functions. LLVM::CConv kernelCallingConvention = LLVM::CConv::C; /// The calling convention to use for non-kernel functions. LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C; /// Whether to encode workgroup attributions as additional arguments instead /// of a global variable. bool encodeWorkgroupAttributionsAsArguments = false; }; struct GPUFuncOpLowering : ConvertOpToLLVMPattern { GPUFuncOpLowering(const LLVMTypeConverter &converter, const GPUFuncOpLoweringOptions &options, PatternBenefit benefit = 1) : ConvertOpToLLVMPattern(converter, benefit), allocaAddrSpace(options.allocaAddrSpace), workgroupAddrSpace(options.workgroupAddrSpace), kernelAttributeName(options.kernelAttributeName), kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName), kernelCallingConvention(options.kernelCallingConvention), nonKernelCallingConvention(options.nonKernelCallingConvention), encodeWorkgroupAttributionsAsArguments( options.encodeWorkgroupAttributionsAsArguments) {} LogicalResult matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; private: /// The address space to use for `alloca`s in private memory. unsigned allocaAddrSpace; /// The address space to use declaring workgroup memory. unsigned workgroupAddrSpace; /// The attribute name to use instead of `gpu.kernel`. Null if no attribute /// should be used. StringAttr kernelAttributeName; /// The attribute name to to set block size. Null if no attribute should be /// used. StringAttr kernelBlockSizeAttributeName; /// The calling convention to use for kernel functions LLVM::CConv kernelCallingConvention; /// The calling convention to use for non-kernel functions LLVM::CConv nonKernelCallingConvention; /// Whether to encode workgroup attributions as additional arguments instead /// of a global variable. bool encodeWorkgroupAttributionsAsArguments; }; /// The lowering of gpu.printf to a call to HIP hostcalls /// /// Simplifies llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp, as we don't have /// to deal with %s (even if there were first-class strings in MLIR, they're not /// legal input to gpu.printf) or non-constant format strings struct GPUPrintfOpToHIPLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; }; /// The lowering of gpu.printf to a call to an external printf() function /// /// This pass will add a declaration of printf() to the GPUModule if needed /// and separate out the format strings into global constants. For some /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler /// will lower printf calls to appropriate device-side code struct GPUPrintfOpToLLVMCallLowering : public ConvertOpToLLVMPattern { GPUPrintfOpToLLVMCallLowering(const LLVMTypeConverter &converter, int addressSpace = 0) : ConvertOpToLLVMPattern(converter), addressSpace(addressSpace) {} LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; private: int addressSpace; }; /// Lowering of gpu.printf to a vprintf standard library. struct GPUPrintfOpToVPrintfLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult matchAndRewrite(gpu::PrintfOp gpuPrintfOp, gpu::PrintfOpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; }; struct GPUReturnOpLowering : public ConvertOpToLLVMPattern { using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, ConversionPatternRewriter &rewriter) const override; }; namespace impl { /// Unrolls op to array/vector elements. LogicalResult scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter); } // namespace impl /// Unrolls SourceOp to array/vector elements. template struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern { public: using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern; LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, ConversionPatternRewriter &rewriter) const override { return impl::scalarizeVectorOp(op, adaptor.getOperands(), rewriter, *this->getTypeConverter()); } }; } // namespace mlir #endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_