aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValentin Clement (バレンタイン クレメン) <clementval@gmail.com>2025-08-04 16:51:11 -0700
committerGitHub <noreply@github.com>2025-08-04 16:51:11 -0700
commit9b195dc3ef66de2c1ff0048822b24a322ec3c52a (patch)
tree8d9abfe38f449d4d897991387cd7d6ca48f9691d
parent951f40ac388fc389b958af027df96356a4c51e33 (diff)
downloadllvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.zip
llvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.tar.gz
llvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.tar.bz2
[flang][cuda] Generate cuf.allocate for descriptor with CUDA components (#152041)
The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a `cuf.allocate` operation.
-rw-r--r--flang/include/flang/Semantics/tools.h2
-rw-r--r--flang/lib/Lower/Allocatable.cpp8
-rw-r--r--flang/lib/Lower/ConvertVariable.cpp6
-rw-r--r--flang/lib/Semantics/tools.cpp15
-rw-r--r--flang/test/Lower/CUDA/cuda-allocatable.cuf13
5 files changed, 39 insertions, 5 deletions
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b935..966a30f 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
return false;
}
+bool HasCUDAComponent(const Symbol &sym);
+
inline bool IsCUDAShared(const Symbol &sym) {
if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index ef16b0c..219f920 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ private:
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
- bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+ bool isCudaAllocate =
+ Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+ Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
!alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ private:
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
if (inlineAllocation &&
- ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+ ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ private:
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (!isCudaSymbol) {
+ if (!isCudaAllocate) {
stat = genRuntimeAllocate(builder, loc, box, errorManager);
setPinnedToFalse();
} else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 88d17ac..a4a8a69 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
- if (mlir::isa<fir::SequenceType>(baseTy))
- TODO(loc, "array of derived-type with device component");
+ if (mlir::isa<fir::SequenceType>(baseTy) &&
+ (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+ fir::isPointerType(fir::getBase(exv).getType())))
+ return; // Allocator index need to be set after allocation.
auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e..913bf08 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
return false;
}
+bool HasCUDAComponent(const Symbol &symbol) {
+ if (const auto *details{symbol.GetUltimate()
+ .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+ const Fortran::semantics::DeclTypeSpec *type{details->type()};
+ const Fortran::semantics::DerivedTypeSpec *derived{
+ type ? type->AsDerived() : nullptr};
+ if (derived) {
+ if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
UltimateComponentIterator::const_iterator
FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768b..2cf8c7d 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
real, device, allocatable :: a_device(:)
real, managed, allocatable :: a_managed(:)
real, pinned, allocatable :: a_pinned(:)
+ type :: t1
+ integer :: a
+ real, dimension(:), allocatable, device :: b
+ end type
end module
! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
! CHECK: %[[FALSE:.*]] = arith.constant false
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+ use globals
+ type(t1), pointer, dimension(:) :: d
+ allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate