diff options
author | Valentin Clement (バレンタイン クレメン) <clementval@gmail.com> | 2025-08-04 16:51:11 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-04 16:51:11 -0700 |
commit | 9b195dc3ef66de2c1ff0048822b24a322ec3c52a (patch) | |
tree | 8d9abfe38f449d4d897991387cd7d6ca48f9691d | |
parent | 951f40ac388fc389b958af027df96356a4c51e33 (diff) | |
download | llvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.zip llvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.tar.gz llvm-9b195dc3ef66de2c1ff0048822b24a322ec3c52a.tar.bz2 |
[flang][cuda] Generate cuf.allocate for descriptor with CUDA components (#152041)
The descriptor for derived-type with CUDA components are allocated in
managed memory. The lowering was calling the standard runtime on
allocate statement where it should be a `cuf.allocate` operation.
-rw-r--r-- | flang/include/flang/Semantics/tools.h | 2 | ||||
-rw-r--r-- | flang/lib/Lower/Allocatable.cpp | 8 | ||||
-rw-r--r-- | flang/lib/Lower/ConvertVariable.cpp | 6 | ||||
-rw-r--r-- | flang/lib/Semantics/tools.cpp | 15 | ||||
-rw-r--r-- | flang/test/Lower/CUDA/cuda-allocatable.cuf | 13 |
5 files changed, 39 insertions, 5 deletions
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h index 317b935..966a30f 100644 --- a/flang/include/flang/Semantics/tools.h +++ b/flang/include/flang/Semantics/tools.h @@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) { return false; } +bool HasCUDAComponent(const Symbol &sym); + inline bool IsCUDAShared(const Symbol &sym) { if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) { if (details->cudaDataAttr() && diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index ef16b0c..219f920 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -466,7 +466,9 @@ private: void genSimpleAllocation(const Allocation &alloc, const fir::MutableBoxValue &box) { - bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol()); + bool isCudaAllocate = + Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) || + Fortran::semantics::HasCUDAComponent(alloc.getSymbol()); bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion()); bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() && !alloc.type.IsPolymorphic() && @@ -475,7 +477,7 @@ private: unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol()); if (inlineAllocation && - ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) { + ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) { // Pointers must use PointerAllocate so that their deallocations // can be validated. genInlinedAllocation(alloc, box); @@ -494,7 +496,7 @@ private: genSetDeferredLengthParameters(alloc, box); genAllocateObjectBounds(alloc, box); mlir::Value stat; - if (!isCudaSymbol) { + if (!isCudaAllocate) { stat = genRuntimeAllocate(builder, loc, box, errorManager); setPinnedToFalse(); } else { diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 88d17ac..a4a8a69 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter, baseTy = boxTy.getEleTy(); baseTy = fir::unwrapRefType(baseTy); - if (mlir::isa<fir::SequenceType>(baseTy)) - TODO(loc, "array of derived-type with device component"); + if (mlir::isa<fir::SequenceType>(baseTy) && + (fir::isAllocatableType(fir::getBase(exv).getType()) || + fir::isPointerType(fir::getBase(exv).getType()))) + return; // Allocator index need to be set after allocation. auto recTy = mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy)); diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp index 5a5b02e..913bf08 100644 --- a/flang/lib/Semantics/tools.cpp +++ b/flang/lib/Semantics/tools.cpp @@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) { return false; } +bool HasCUDAComponent(const Symbol &symbol) { + if (const auto *details{symbol.GetUltimate() + .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) { + const Fortran::semantics::DeclTypeSpec *type{details->type()}; + const Fortran::semantics::DerivedTypeSpec *derived{ + type ? type->AsDerived() : nullptr}; + if (derived) { + if (FindCUDADeviceAllocatableUltimateComponent(*derived)) { + return true; + } + } + } + return false; +} + UltimateComponentIterator::const_iterator FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) { UltimateComponentIterator ultimates{derived}; diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index 36e768b..2cf8c7d 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -6,6 +6,10 @@ module globals real, device, allocatable :: a_device(:) real, managed, allocatable :: a_managed(:) real, pinned, allocatable :: a_pinned(:) + type :: t1 + integer :: a + real, dimension(:), allocatable, device :: b + end type end module ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>> @@ -222,3 +226,12 @@ end ! CHECK: %[[FALSE:.*]] = arith.constant false ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4> ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>> + +subroutine cuda_component() + use globals + type(t1), pointer, dimension(:) :: d + allocate(d(10)) +end subroutine + +! CHECK-LABEL: func.func @_QPcuda_component() +! CHECK: cuf.allocate |