aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Clement (バレンタイン クレメン) <clementval@gmail.com>2024-04-26 13:31:34 -0700
committerGitHub <noreply@github.com>2024-04-26 13:31:34 -0700
commiteb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726 (patch)
tree313ca035b89098bc77a4a2fbecb6bdeaeac4e840
parent9ee8e38cdcc6925a4127d44a0360dc8de23dfb5f (diff)
downloadllvm-eb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726.zip
llvm-eb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726.tar.gz
llvm-eb5907d06f2ffc3ed7fb8d4772bd3f8eab89c726.tar.bz2
[flang][cuda] Avoid to issue data transfer in device context (#90247)
Data transfer should not be issued in device function.
-rw-r--r--flang/lib/Lower/Bridge.cpp28
-rw-r--r--flang/test/Lower/CUDA/cuda-data-transfer.cuf18
2 files changed, 41 insertions, 5 deletions
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index f66607d..92a701a 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -3806,16 +3806,34 @@ private:
return temps;
}
+ // Check if the insertion point is currently in a device context. HostDevice
+ // subprogram are not considered fully device context so it will return false
+ // for it.
+ static bool isDeviceContext(fir::FirOpBuilder &builder) {
+ if (builder.getRegion().getParentOfType<fir::CUDAKernelOp>())
+ return true;
+ if (auto funcOp =
+ builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
+ if (auto cudaProcAttr =
+ funcOp.getOperation()->getAttrOfType<fir::CUDAProcAttributeAttr>(
+ fir::getCUDAAttrName())) {
+ return cudaProcAttr.getValue() != fir::CUDAProcAttribute::Host &&
+ cudaProcAttr.getValue() != fir::CUDAProcAttribute::HostDevice;
+ }
+ }
+ return false;
+ }
+
void genDataAssignment(
const Fortran::evaluate::Assignment &assign,
const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
- bool isInDeviceContext =
- builder.getRegion().getParentOfType<fir::CUDAKernelOp>();
- bool isCUDATransfer = Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
- Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+ bool isInDeviceContext = isDeviceContext(builder);
+ bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
+ Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+ !isInDeviceContext;
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
llvm::SmallVector<mlir::Value> implicitTemps;
@@ -3878,7 +3896,7 @@ private:
Fortran::lower::StatementContext localStmtCtx;
hlfir::Entity rhs = evaluateRhs(localStmtCtx);
hlfir::Entity lhs = evaluateLhs(localStmtCtx);
- if (isCUDATransfer && !hasCUDAImplicitTransfer && !isInDeviceContext)
+ if (isCUDATransfer && !hasCUDAImplicitTransfer)
genCUDADataTransfer(builder, loc, assign, lhs, rhs);
else
builder.create<hlfir::AssignOp>(loc, rhs, lhs,
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 7048368..0a26086 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -141,3 +141,21 @@ end subroutine
! CHECK: fir.cuda_kernel<<<*, *>>>
! CHECK-NOT: fir.cuda_data_transfer
! CHECK: hlfir.assign
+
+attributes(global) subroutine sub5(a)
+ integer, device :: a
+ integer :: i
+ a = i
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub5
+! CHECK-NOT: fir.cuda_data_transfer
+
+attributes(host,device) subroutine sub6(a)
+ integer, device :: a
+ integer :: i
+ a = i
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub6
+! CHECK: fir.cuda_data_transfer