diff options
author | Valentin Clement (バレンタイン クレメン) <clementval@gmail.com> | 2025-01-29 08:04:22 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-01-29 08:04:22 -0800 |
commit | 382d3599c203573388b82717dc17e3db4039916a (patch) | |
tree | ffbf39257fb3edf100557450fe7b0e0a705e71a6 | |
parent | 29441e4f5fa5f5c7709f7cf180815ba97f611297 (diff) | |
download | llvm-382d3599c203573388b82717dc17e3db4039916a.zip llvm-382d3599c203573388b82717dc17e3db4039916a.tar.gz llvm-382d3599c203573388b82717dc17e3db4039916a.tar.bz2 |
[flang][cuda] Propagate the data attribute on the converted calls (#124877)
-rw-r--r-- | flang/lib/Optimizer/Transforms/CUFOpConversion.cpp | 14 | ||||
-rw-r--r-- | flang/test/Fir/CUDA/cuda-alloc-free.fir | 12 | ||||
-rw-r--r-- | flang/test/Fir/CUDA/cuda-allocate.fir | 4 | ||||
-rw-r--r-- | flang/test/Fir/CUDA/cuda-data-transfer.fir | 2 |
4 files changed, 19 insertions, 13 deletions
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index cc525d7..710aed5 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -294,19 +294,22 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> { matchAndRewrite(cuf::AllocOp op, mlir::PatternRewriter &rewriter) const override { + mlir::Location loc = op.getLoc(); + if (inDeviceContext(op.getOperation())) { // In device context just replace the cuf.alloc operation with a fir.alloc // the cuf.free will be removed. - rewriter.replaceOpWithNewOp<fir::AllocaOp>( - op, op.getInType(), op.getUniqName() ? *op.getUniqName() : "", + auto allocaOp = rewriter.create<fir::AllocaOp>( + loc, op.getInType(), op.getUniqName() ? *op.getUniqName() : "", op.getBindcName() ? *op.getBindcName() : "", op.getTypeparams(), op.getShape()); + allocaOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); + rewriter.replaceOp(op, allocaOp); return mlir::success(); } auto mod = op->getParentOfType<mlir::ModuleOp>(); fir::FirOpBuilder builder(rewriter, mod); - mlir::Location loc = op.getLoc(); mlir::Value sourceFile = fir::factory::locationToFilename(builder, loc); if (!mlir::dyn_cast_or_null<fir::BaseBoxType>(op.getInType())) { @@ -359,6 +362,7 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> { llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( builder, loc, fTy, bytes, memTy, sourceFile, sourceLine)}; auto callOp = builder.create<fir::CallOp>(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); auto convOp = builder.createConvert(loc, op.getResult().getType(), callOp.getResult(0)); rewriter.replaceOp(op, convOp); @@ -381,6 +385,7 @@ struct CUFAllocOpConversion : public mlir::OpRewritePattern<cuf::AllocOp> { llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( builder, loc, fTy, sizeInBytes, sourceFile, sourceLine)}; auto callOp = builder.create<fir::CallOp>(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); auto convOp = builder.createConvert(loc, op.getResult().getType(), callOp.getResult(0)); rewriter.replaceOp(op, convOp); @@ -508,7 +513,8 @@ struct CUFFreeOpConversion : public mlir::OpRewritePattern<cuf::FreeOp> { fir::factory::locationToLineNo(builder, loc, fTy.getInput(2)); llvm::SmallVector<mlir::Value> args{fir::runtime::createArguments( builder, loc, fTy, op.getDevptr(), sourceFile, sourceLine)}; - builder.create<fir::CallOp>(loc, func, args); + auto callOp = builder.create<fir::CallOp>(loc, func, args); + callOp->setAttr(cuf::getDataAttrName(), op.getDataAttrAttr()); rewriter.eraseOp(op); return mlir::success(); } diff --git a/flang/test/Fir/CUDA/cuda-alloc-free.fir b/flang/test/Fir/CUDA/cuda-alloc-free.fir index 6194f00..31f2ed0 100644 --- a/flang/test/Fir/CUDA/cuda-alloc-free.fir +++ b/flang/test/Fir/CUDA/cuda-alloc-free.fir @@ -11,7 +11,7 @@ func.func @_QPsub1() { // CHECK-LABEL: func.func @_QPsub1() // CHECK: %[[BYTES:.*]] = fir.convert %c4{{.*}} : (index) -> i64 -// CHECK: %[[ALLOC:.*]] = fir.call @_FortranACUFMemAlloc(%[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: %[[ALLOC:.*]] = fir.call @_FortranACUFMemAlloc(%[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> // CHECK: %[[CONV:.*]] = fir.convert %3 : (!fir.llvm_ptr<i8>) -> !fir.ref<i32> // CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[CONV]] {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub1Eidev"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) // CHECK: %[[DEVPTR:.*]] = fir.convert %[[DECL]]#1 : (!fir.ref<i32>) -> !fir.llvm_ptr<i8> @@ -26,7 +26,7 @@ func.func @_QPsub2() { // CHECK-LABEL: func.func @_QPsub2() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c4{{.*}} : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> // CHECK: fir.call @_FortranACUFMemFree func.func @_QPsub3(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref<i32> {fir.bindc_name = "m"}) { @@ -58,7 +58,7 @@ func.func @_QPsub3(%arg0: !fir.ref<i32> {fir.bindc_name = "n"}, %arg1: !fir.ref< // CHECK: %[[NBELEM:.*]] = arith.muli %[[N]], %[[M]] : index // CHECK: %[[BYTES:.*]] = arith.muli %[[NBELEM]], %c4{{.*}} : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: %{{.*}} = fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> // CHECK: fir.call @_FortranACUFMemFree func.func @_QPtest_type() { @@ -71,7 +71,7 @@ func.func @_QPtest_type() { // CHECK-LABEL: func.func @_QPtest_type() // CHECK: %[[BYTES:.*]] = arith.constant 12 : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> gpu.module @cuda_device_mod { gpu.func @_QMalloc() kernel { @@ -81,7 +81,7 @@ gpu.module @cuda_device_mod { } // CHECK-LABEL: gpu.func @_QMalloc() kernel -// CHECK: fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QMallocEa"} +// CHECK: fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", cuf.data_attr = #cuf.cuda<device>, uniq_name = "_QMallocEa"} func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { %c1 = arith.constant 1 : index @@ -92,6 +92,6 @@ func.func @_QQalloc_char() attributes {fir.bindc_name = "alloc_char"} { // CHECK-LABEL: func.func @_QQalloc_char() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c1{{.*}} : index // CHECK: %[[BYTES_CONV:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: fir.call @_FortranACUFMemAlloc(%[[BYTES_CONV]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> } // end module diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index b8457b8..0857311 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -15,7 +15,7 @@ func.func @_QPsub1() { } // CHECK-LABEL: func.func @_QPsub1() -// CHECK: %[[DESC_RT_CALL:.*]] = fir.call @_FortranACUFAllocDescriptor(%{{.*}}, %{{.*}}, %{{.*}}) : (i64, !fir.ref<i8>, i32) -> !fir.ref<!fir.box<none>> +// CHECK: %[[DESC_RT_CALL:.*]] = fir.call @_FortranACUFAllocDescriptor(%{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, !fir.ref<i8>, i32) -> !fir.ref<!fir.box<none>> // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref<!fir.box<none>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub1Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>> @@ -24,7 +24,7 @@ func.func @_QPsub1() { // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, i1, !fir.box<none>, !fir.ref<i8>, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<none>> -// CHECK: fir.call @_FortranACUFFreeDescriptor(%[[BOX_NONE]], %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<i8>, i32) -> () +// CHECK: fir.call @_FortranACUFFreeDescriptor(%[[BOX_NONE]], %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (!fir.ref<!fir.box<none>>, !fir.ref<i8>, i32) -> () fir.global @_QMmod1Ea {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>> { %0 = fir.zero_bits !fir.heap<!fir.array<?xf32>> diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir index 415d001..b62c500 100644 --- a/flang/test/Fir/CUDA/cuda-data-transfer.fir +++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir @@ -329,7 +329,7 @@ func.func @_QPtest_array_type() { // CHECK-LABEL: func.func @_QPtest_array_type() // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c12 : index // CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64 -// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) {cuf.data_attr = #cuf.cuda<device>} : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> // CHECK: %[[BYTES:.*]] = arith.muli %c10{{.*}}, %c12{{.*}} : i64 // CHECK: fir.call @_FortranACUFDataTransferPtrPtr(%{{.*}}, %{{.*}}, %[[BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (!fir.llvm_ptr<i8>, !fir.llvm_ptr<i8>, i64, i32, !fir.ref<i8>, i32) -> () |