diff options
author | Valentin Clement (バレンタイン クレメン) <clementval@gmail.com> | 2024-03-25 11:53:39 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-25 11:53:39 -0700 |
commit | 4e6745cc4db309c0e1b5e41d4598f67763f4c096 (patch) | |
tree | 8f503d95341bd94bffe8406328221bb72c81904e /flang/lib/Lower/Bridge.cpp | |
parent | 8b9c3b57b158beeedf69810c2a546ab350a91e57 (diff) | |
download | llvm-4e6745cc4db309c0e1b5e41d4598f67763f4c096.zip llvm-4e6745cc4db309c0e1b5e41d4598f67763f4c096.tar.gz llvm-4e6745cc4db309c0e1b5e41d4598f67763f4c096.tar.bz2 |
[flang][cuda] Lower simple host to device data transfer (#85960)
In CUDA Fortran data transfer can be done via assignment statements
between host and device variables.
This patch introduces a `fir.cuda_data_transfer` operation that
materialized the data transfer between two memory references.
Simple transfer not involving descriptors from host to device are also
lowered in this patch. When the rhs is an expression that required an
evaluation, a temporary is created. The evaluation is done on the host
and then the transfer is initiated.
Implicit transfer when device symbol are present on the rhs is not part
of this patch. Transfer from device to host is not part of this patch.
Diffstat (limited to 'flang/lib/Lower/Bridge.cpp')
-rw-r--r-- | flang/lib/Lower/Bridge.cpp | 40 |
1 files changed, 34 insertions, 6 deletions
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 0b54ee8..48830dc 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -3706,15 +3706,39 @@ private: return false; } + static void genCUDADataTransfer(fir::FirOpBuilder &builder, + mlir::Location loc, bool lhsIsDevice, + hlfir::Entity &lhs, bool rhsIsDevice, + hlfir::Entity &rhs) { + if (rhs.isBoxAddressOrValue() || lhs.isBoxAddressOrValue()) + TODO(loc, "CUDA data transfler with descriptors"); + if (lhsIsDevice && !rhsIsDevice) { + auto transferKindAttr = fir::CUDADataTransferKindAttr::get( + builder.getContext(), fir::CUDADataTransferKind::HostDevice); + // device = host + if (!rhs.isVariable()) { + auto associate = hlfir::genAssociateExpr( + loc, builder, rhs, rhs.getType(), ".cuf_host_tmp"); + builder.create<fir::CUDADataTransferOp>(loc, associate.getBase(), lhs, + transferKindAttr); + builder.create<hlfir::EndAssociateOp>(loc, associate); + } else { + builder.create<fir::CUDADataTransferOp>(loc, rhs, lhs, + transferKindAttr); + } + return; + } + TODO(loc, "Assignement with CUDA Fortran variables"); + } + void genDataAssignment( const Fortran::evaluate::Assignment &assign, const Fortran::evaluate::ProcedureRef *userDefinedAssignment) { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); - if (Fortran::evaluate::HasCUDAAttrs(assign.lhs) || - Fortran::evaluate::HasCUDAAttrs(assign.rhs)) - TODO(loc, "Assignement with CUDA Fortran variables"); + bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs); + bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs); // Gather some information about the assignment that will impact how it is // lowered. @@ -3772,9 +3796,13 @@ private: Fortran::lower::StatementContext localStmtCtx; hlfir::Entity rhs = evaluateRhs(localStmtCtx); hlfir::Entity lhs = evaluateLhs(localStmtCtx); - builder.create<hlfir::AssignOp>(loc, rhs, lhs, - isWholeAllocatableAssignment, - keepLhsLengthInAllocatableAssignment); + if (lhsIsDevice || rhsIsDevice) { + genCUDADataTransfer(builder, loc, lhsIsDevice, lhs, rhsIsDevice, rhs); + } else { + builder.create<hlfir::AssignOp>(loc, rhs, lhs, + isWholeAllocatableAssignment, + keepLhsLengthInAllocatableAssignment); + } return; } // Assignments inside Forall, Where, or assignments to a vector subscripted |