aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthias Springer <me@m-sp.org>2024-06-23 09:51:12 +0200
committerGitHub <noreply@github.com>2024-06-23 09:51:12 +0200
commit3f33d2f3ca570f1e4e016a07f049724fdff6dad9 (patch)
treeb36ac566914199d1cb73f165cb969ce7348d04a2
parent346c4a88afedcef3da40f68c83f0a5b3e0ac61ea (diff)
downloadllvm-3f33d2f3ca570f1e4e016a07f049724fdff6dad9.zip
llvm-3f33d2f3ca570f1e4e016a07f049724fdff6dad9.tar.gz
llvm-3f33d2f3ca570f1e4e016a07f049724fdff6dad9.tar.bz2
[mlir][GPUToNVVM] Fix memref function args/results (#96392)
The `gpu.func` op lowering accounts for memref arguments/results (both "normal" and bare-pointer supported), but the `gpu.return` op lowering did not. The lowering produced invalid IR that did not verify. This commit uses the same lowering strategy as for `func.return` in the `gpu.return` lowering. (The C++ implementation is copied. We may want to share some code between `func` and `gpu` lowerings in the future.)
-rw-r--r--mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp56
-rw-r--r--mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h5
-rw-r--r--mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir29
3 files changed, 82 insertions, 8 deletions
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 052a48c..3e6fcc0 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -684,6 +684,62 @@ LogicalResult GPUDynamicSharedMemoryOpLowering::matchAndRewrite(
return success();
}
+LogicalResult GPUReturnOpLowering::matchAndRewrite(
+ gpu::ReturnOp op, OpAdaptor adaptor,
+ ConversionPatternRewriter &rewriter) const {
+ Location loc = op.getLoc();
+ unsigned numArguments = op.getNumOperands();
+ SmallVector<Value, 4> updatedOperands;
+
+ bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv;
+ if (useBarePtrCallConv) {
+ // For the bare-ptr calling convention, extract the aligned pointer to
+ // be returned from the memref descriptor.
+ for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) {
+ Type oldTy = std::get<0>(it).getType();
+ Value newOperand = std::get<1>(it);
+ if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr(
+ cast<BaseMemRefType>(oldTy))) {
+ MemRefDescriptor memrefDesc(newOperand);
+ newOperand = memrefDesc.allocatedPtr(rewriter, loc);
+ } else if (isa<UnrankedMemRefType>(oldTy)) {
+ // Unranked memref is not supported in the bare pointer calling
+ // convention.
+ return failure();
+ }
+ updatedOperands.push_back(newOperand);
+ }
+ } else {
+ updatedOperands = llvm::to_vector<4>(adaptor.getOperands());
+ (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(),
+ updatedOperands,
+ /*toDynamic=*/true);
+ }
+
+ // If ReturnOp has 0 or 1 operand, create it and return immediately.
+ if (numArguments <= 1) {
+ rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(
+ op, TypeRange(), updatedOperands, op->getAttrs());
+ return success();
+ }
+
+ // Otherwise, we need to pack the arguments into an LLVM struct type before
+ // returning.
+ auto packedType = getTypeConverter()->packFunctionResults(
+ op.getOperandTypes(), useBarePtrCallConv);
+ if (!packedType) {
+ return rewriter.notifyMatchFailure(op, "could not convert result types");
+ }
+
+ Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType);
+ for (auto [idx, operand] : llvm::enumerate(updatedOperands)) {
+ packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx);
+ }
+ rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed,
+ op->getAttrs());
+ return success();
+}
+
void mlir::populateGpuMemorySpaceAttributeConversions(
TypeConverter &typeConverter, const MemorySpaceMapping &mapping) {
typeConverter.addTypeAttributeConversion(
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index 0ec260b..92e69ba 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -112,10 +112,7 @@ struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> {
LogicalResult
matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor,
- ConversionPatternRewriter &rewriter) const override {
- rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, adaptor.getOperands());
- return success();
- }
+ ConversionPatternRewriter &rewriter) const override;
};
namespace impl {
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index c57cfd2..d914790 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1,4 +1,5 @@
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE
// RUN: mlir-opt %s -transform-interpreter | FileCheck %s
gpu.module @test_module_0 {
@@ -670,7 +671,7 @@ gpu.module @test_module_32 {
}
}
-gpu.module @gpumodule {
+gpu.module @test_module_33 {
// CHECK-LABEL: func @kernel_with_block_size()
// CHECK: attributes {gpu.kernel, gpu.known_block_size = array<i32: 128, 1, 1>, nvvm.kernel, nvvm.maxntid = array<i32: 128, 1, 1>}
gpu.func @kernel_with_block_size() kernel attributes {known_block_size = array<i32: 128, 1, 1>} {
@@ -679,6 +680,28 @@ gpu.module @gpumodule {
}
+gpu.module @test_module_34 {
+ // CHECK-LABEL: llvm.func @memref_signature(
+ // CHECK-SAME: %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: f32) -> !llvm.struct<(struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, f32)>
+ // CHECK: llvm.mlir.undef
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.mlir.undef
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.insertvalue
+ // CHECK: llvm.return
+
+ // CHECK-BARE-LABEL: llvm.func @memref_signature(
+ // CHECK-BARE-SAME: %{{.*}}: !llvm.ptr, %{{.*}}: f32) -> !llvm.struct<(ptr, f32)>
+ gpu.func @memref_signature(%m: memref<2xf32>, %f: f32) -> (memref<2xf32>, f32) {
+ gpu.return %m, %f : memref<2xf32>, f32
+ }
+}
+
+
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%toplevel_module: !transform.any_op {transform.readonly}) {
%gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module
@@ -701,9 +724,7 @@ module attributes {transform.with_named_sequence} {
} with type_converter {
transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
{index_bitwidth = 64,
- use_bare_ptr = true,
- use_bare_ptr_memref_call_conv = true,
- use_opaque_pointers = true}
+ use_bare_ptr_call_conv = false}
} {
legal_dialects = ["llvm", "memref", "nvvm", "test"],
legal_ops = ["func.func", "gpu.module", "gpu.module_end", "gpu.yield"],