aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
authorValentin Clement (バレンタイン クレメン) <clementval@gmail.com>2024-06-19 13:35:02 -0700
committerGitHub <noreply@github.com>2024-06-19 13:35:02 -0700
commit8e8dccdecd4a5302fcfad33b4ee1282ae808b106 (patch)
treeee5533c44697e708367a74fde88591ea59a82581 /flang
parentf991ebbb465301a1382d6ce15a346b4edc3cfce2 (diff)
downloadllvm-8e8dccdecd4a5302fcfad33b4ee1282ae808b106.zip
llvm-8e8dccdecd4a5302fcfad33b4ee1282ae808b106.tar.gz
llvm-8e8dccdecd4a5302fcfad33b4ee1282ae808b106.tar.bz2
[flang][cuda] Do not consider PINNED as device attribute (#95988)
PINNED is a CUDA data attribute meant for the host variables. Do not consider it when computing the number of device variables in assignment for the cuda data transfer.
Diffstat (limited to 'flang')
-rw-r--r--flang/include/flang/Evaluate/tools.h15
-rw-r--r--flang/lib/Lower/Bridge.cpp11
-rw-r--r--flang/lib/Semantics/check-cuda.cpp4
-rw-r--r--flang/test/Lower/CUDA/cuda-data-transfer.cuf29
4 files changed, 45 insertions, 14 deletions
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index 9c3dfb7..340325b 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1231,12 +1231,13 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
const std::string &argName);
// Get the number of distinct symbols with CUDA attribute in the expression.
-template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
+template <typename A> inline int GetNbOfCUDADeviceSymbols(const A &expr) {
semantics::UnorderedSymbolSet symbols;
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
symbols.insert(sym);
}
}
@@ -1246,8 +1247,8 @@ template <typename A> inline int GetNbOfCUDASymbols(const A &expr) {
// Check if any of the symbols part of the expression has a CUDA data
// attribute.
-template <typename A> inline bool HasCUDAAttrs(const A &expr) {
- return GetNbOfCUDASymbols(expr) > 0;
+template <typename A> inline bool HasCUDADeviceAttrs(const A &expr) {
+ return GetNbOfCUDADeviceSymbols(expr) > 0;
}
/// Check if the expression is a mix of host and device variables that require
@@ -1258,7 +1259,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
for (const Symbol &sym : CollectSymbols(expr)) {
if (const auto *details =
sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
} else {
if (sym.owner().IsDerivedType()) {
@@ -1267,7 +1269,8 @@ inline bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
.GetSymbol()
->GetUltimate()
.detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
++deviceSymbols;
}
}
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a3088b5..404d1f6 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -4107,8 +4107,8 @@ private:
void genCUDADataTransfer(fir::FirOpBuilder &builder, mlir::Location loc,
const Fortran::evaluate::Assignment &assign,
hlfir::Entity &lhs, hlfir::Entity &rhs) {
- bool lhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.lhs);
- bool rhsIsDevice = Fortran::evaluate::HasCUDAAttrs(assign.rhs);
+ bool lhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs);
+ bool rhsIsDevice = Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs);
auto getRefIfLoaded = [](mlir::Value val) -> mlir::Value {
if (auto loadOp =
@@ -4177,7 +4177,8 @@ private:
if (const auto *details =
sym.GetUltimate()
.detailsIf<Fortran::semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr()) {
+ if (details->cudaDataAttr() &&
+ *details->cudaDataAttr() != Fortran::common::CUDADataAttr::Pinned) {
if (sym.owner().IsDerivedType() && IsAllocatable(sym.GetUltimate()))
TODO(loc, "Device resident allocatable derived-type component");
// TODO: This should probably being checked in semantic and give a
@@ -4229,8 +4230,8 @@ private:
fir::FirOpBuilder &builder = getFirOpBuilder();
bool isInDeviceContext = isDeviceContext(builder);
- bool isCUDATransfer = (Fortran::evaluate::HasCUDAAttrs(assign.lhs) ||
- Fortran::evaluate::HasCUDAAttrs(assign.rhs)) &&
+ bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
+ Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
!isInDeviceContext;
bool hasCUDAImplicitTransfer =
Fortran::evaluate::HasCUDAImplicitTransfer(assign.rhs);
diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp
index 8af50ca..5b3ea21 100644
--- a/flang/lib/Semantics/check-cuda.cpp
+++ b/flang/lib/Semantics/check-cuda.cpp
@@ -548,8 +548,8 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
return;
}
- int nbLhs{evaluate::GetNbOfCUDASymbols(assign->lhs)};
- int nbRhs{evaluate::GetNbOfCUDASymbols(assign->rhs)};
+ int nbLhs{evaluate::GetNbOfCUDADeviceSymbols(assign->lhs)};
+ int nbRhs{evaluate::GetNbOfCUDADeviceSymbols(assign->rhs)};
// device to host transfer with more than one device object on the rhs is not
// legal.
diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
index 3b407b9..5dbf39c 100644
--- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf
+++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf
@@ -180,7 +180,6 @@ end subroutine
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
! CHECK: cuf.data_transfer %[[A]]#0 to %[[C]]#0 {transfer_kind = #cuf.cuda_transfer<device_device>} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
-
subroutine sub8(a, b, n)
integer :: n
integer, device :: a(n)
@@ -195,3 +194,31 @@ end subroutine
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) dummy_scope %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.array<?xi32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<?xi32>>)
! CHECK: cuf.data_transfer %[[A]]#0 to %[[B]]#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.array<?xi32>>, !fir.ref<!fir.array<10xi32>>
! CHECK: cuf.data_transfer %[[B]]#0 to %[[A]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<10xi32>>, !fir.box<!fir.array<?xi32>>
+
+subroutine sub9(a)
+ integer, pinned, allocatable :: a(:)
+ do concurrent (i = 1 : 10)
+ a(i) = 10 + a(i)
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsub9
+! CHECK-NOT: cuf.data_transfer
+
+subroutine sub10(a, b)
+ integer, device :: a
+ integer, allocatable, pinned :: b
+ integer :: res
+
+ res = a + b
+end subroutine
+
+
+
+! CHECK-LABEL: func.func @_QPsub10(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}
+
+! CHECK: %[[A:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %1 {data_attr = #cuf.cuda<device>, uniq_name = "_QFsub10Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: cuf.data_transfer %[[A]]#1 to %{{.*}}#0 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.ref<i32>, !fir.ref<i32>
+! CHECK-NOT: cuf.data_transfer
+