diff options
Diffstat (limited to 'mlir')
-rw-r--r-- | mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp | 56 | ||||
-rw-r--r-- | mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h | 5 | ||||
-rw-r--r-- | mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 29 |
3 files changed, 82 insertions, 8 deletions
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 052a48c..3e6fcc0 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -684,6 +684,62 @@ LogicalResult GPUDynamicSharedMemoryOpLowering::matchAndRewrite( return success(); } +LogicalResult GPUReturnOpLowering::matchAndRewrite( + gpu::ReturnOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const { + Location loc = op.getLoc(); + unsigned numArguments = op.getNumOperands(); + SmallVector<Value, 4> updatedOperands; + + bool useBarePtrCallConv = getTypeConverter()->getOptions().useBarePtrCallConv; + if (useBarePtrCallConv) { + // For the bare-ptr calling convention, extract the aligned pointer to + // be returned from the memref descriptor. + for (auto it : llvm::zip(op->getOperands(), adaptor.getOperands())) { + Type oldTy = std::get<0>(it).getType(); + Value newOperand = std::get<1>(it); + if (isa<MemRefType>(oldTy) && getTypeConverter()->canConvertToBarePtr( + cast<BaseMemRefType>(oldTy))) { + MemRefDescriptor memrefDesc(newOperand); + newOperand = memrefDesc.allocatedPtr(rewriter, loc); + } else if (isa<UnrankedMemRefType>(oldTy)) { + // Unranked memref is not supported in the bare pointer calling + // convention. + return failure(); + } + updatedOperands.push_back(newOperand); + } + } else { + updatedOperands = llvm::to_vector<4>(adaptor.getOperands()); + (void)copyUnrankedDescriptors(rewriter, loc, op.getOperands().getTypes(), + updatedOperands, + /*toDynamic=*/true); + } + + // If ReturnOp has 0 or 1 operand, create it and return immediately. + if (numArguments <= 1) { + rewriter.replaceOpWithNewOp<LLVM::ReturnOp>( + op, TypeRange(), updatedOperands, op->getAttrs()); + return success(); + } + + // Otherwise, we need to pack the arguments into an LLVM struct type before + // returning. + auto packedType = getTypeConverter()->packFunctionResults( + op.getOperandTypes(), useBarePtrCallConv); + if (!packedType) { + return rewriter.notifyMatchFailure(op, "could not convert result types"); + } + + Value packed = rewriter.create<LLVM::UndefOp>(loc, packedType); + for (auto [idx, operand] : llvm::enumerate(updatedOperands)) { + packed = rewriter.create<LLVM::InsertValueOp>(loc, packed, operand, idx); + } + rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, TypeRange(), packed, + op->getAttrs()); + return success(); +} + void mlir::populateGpuMemorySpaceAttributeConversions( TypeConverter &typeConverter, const MemorySpaceMapping &mapping) { typeConverter.addTypeAttributeConversion( diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h index 0ec260b..92e69ba 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h @@ -112,10 +112,7 @@ struct GPUReturnOpLowering : public ConvertOpToLLVMPattern<gpu::ReturnOp> { LogicalResult matchAndRewrite(gpu::ReturnOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const override { - rewriter.replaceOpWithNewOp<LLVM::ReturnOp>(op, adaptor.getOperands()); - return success(); - } + ConversionPatternRewriter &rewriter) const override; }; namespace impl { diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index c57cfd2..d914790 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -1,4 +1,5 @@ // RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s +// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE // RUN: mlir-opt %s -transform-interpreter | FileCheck %s gpu.module @test_module_0 { @@ -670,7 +671,7 @@ gpu.module @test_module_32 { } } -gpu.module @gpumodule { +gpu.module @test_module_33 { // CHECK-LABEL: func @kernel_with_block_size() // CHECK: attributes {gpu.kernel, gpu.known_block_size = array<i32: 128, 1, 1>, nvvm.kernel, nvvm.maxntid = array<i32: 128, 1, 1>} gpu.func @kernel_with_block_size() kernel attributes {known_block_size = array<i32: 128, 1, 1>} { @@ -679,6 +680,28 @@ gpu.module @gpumodule { } +gpu.module @test_module_34 { + // CHECK-LABEL: llvm.func @memref_signature( + // CHECK-SAME: %{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: i64, %{{.*}}: f32) -> !llvm.struct<(struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, f32)> + // CHECK: llvm.mlir.undef + // CHECK: llvm.insertvalue + // CHECK: llvm.insertvalue + // CHECK: llvm.insertvalue + // CHECK: llvm.insertvalue + // CHECK: llvm.insertvalue + // CHECK: llvm.mlir.undef + // CHECK: llvm.insertvalue + // CHECK: llvm.insertvalue + // CHECK: llvm.return + + // CHECK-BARE-LABEL: llvm.func @memref_signature( + // CHECK-BARE-SAME: %{{.*}}: !llvm.ptr, %{{.*}}: f32) -> !llvm.struct<(ptr, f32)> + gpu.func @memref_signature(%m: memref<2xf32>, %f: f32) -> (memref<2xf32>, f32) { + gpu.return %m, %f : memref<2xf32>, f32 + } +} + + module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%toplevel_module: !transform.any_op {transform.readonly}) { %gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module @@ -701,9 +724,7 @@ module attributes {transform.with_named_sequence} { } with type_converter { transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter {index_bitwidth = 64, - use_bare_ptr = true, - use_bare_ptr_memref_call_conv = true, - use_opaque_pointers = true} + use_bare_ptr_call_conv = false} } { legal_dialects = ["llvm", "memref", "nvvm", "test"], legal_ops = ["func.func", "gpu.module", "gpu.module_end", "gpu.yield"], |