aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
Diffstat (limited to 'flang')
-rw-r--r--flang/docs/GettingInvolved.md21
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h1
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h7
-rw-r--r--flang/include/flang/Optimizer/Support/Utils.h6
-rw-r--r--flang/lib/Lower/IO.cpp8
-rw-r--r--flang/lib/Lower/OpenACC.cpp27
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp15
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp4
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp71
-rw-r--r--flang/lib/Optimizer/Support/Utils.cpp10
-rw-r--r--flang/lib/Optimizer/Transforms/CUFOpConversion.cpp7
-rw-r--r--flang/module/cudadevice.f909
-rw-r--r--flang/test/Fir/CUDA/cuda-data-transfer.fir74
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf12
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f905
-rw-r--r--flang/test/Lower/OpenACC/acc-private.f9050
-rw-r--r--flang/test/Lower/namelist.f9040
17 files changed, 339 insertions, 28 deletions
diff --git a/flang/docs/GettingInvolved.md b/flang/docs/GettingInvolved.md
index e2220f3..79af788 100644
--- a/flang/docs/GettingInvolved.md
+++ b/flang/docs/GettingInvolved.md
@@ -46,26 +46,17 @@ Contributions to Flang are done using GitHub Pull Requests and follow the
## Calls
-### Flang Community Biweekly Call
+### Flang Biweekly Call
-- General updates on the Flang project.
-- Join [Flang Community Biweekly Call](https://lanl-us.webex.com/lanl-us/j.php?MTID=mdce13c9bd55202e8071d8128fb953614)
- - If you prefer to join using a meeting number and password, those can be
- found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available.
-- Time: Wednesdays, 8:30 a.m. Pacific Time, on the weeks alternating with regular Flang Community Technical Biweekly Call.
+- Technical discussions as well as general updates on the Flang project.
+- Join the [Flang Biweekly Call](https://lanl-us.webex.com/lanl-us/j.php?MTID=mdce13c9bd55202e8071d8128fb953614)
+ - If you prefer to join using a meeting number and password, those can be
+ found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available there.
+- Time: Every other Wednesday, 8:30 a.m. Pacific Time
- Calendar invite: https://drive.google.com/file/d/1rkfWCtIvQFcxN0Uz8YVwQGoX_BbzT8oc/view?usp=drive_link
- Meeting minutes are available in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/edit).
- Minutes from older meetings were posted on the [Flang forum](https://discourse.llvm.org/c/subprojects/flang). Search for `Flang Biweekly Sync - Notes`.
-### Flang Community Technical Biweekly Call
-
-- Technical topics call.
-- Join [Flang Community Technical Biweekly Call](https://teams.microsoft.com/l/meetup-join/19%3ameeting_YWU1NzU4ZjQtOTljOS00NWU1LTg5NjktYTUzOTU3MGEwMzAx%40thread.v2/0?context=%7b%22Tid%22%3a%22f34e5979-57d9-4aaa-ad4d-b122a662184d%22%2c%22Oid%22%3a%223641875c-ef5b-4767-8105-0787a195852f%22%7d)
- - If you prefer to join using a meeting ID and passcode, those can be
- found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available.
-- Time: Mondays, 8:30 a.m. Pacific Time, on the weeks alternating with regular Flang Community Biweekly Call.
-- The agenda is in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/).
-
### LLVM Alias Analysis Technical Call
- For people working on improvements to LLVM alias analysis.
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 2adfd6f2..c3cd119b 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -459,6 +459,7 @@ struct IntrinsicLibrary {
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genTransfer(mlir::Type,
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
index 1085393..408f039 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
@@ -57,8 +57,11 @@ struct OpenACCMappableModel
mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var,
llvm::StringRef varName,
- mlir::ValueRange extents,
- mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal,
+ bool &needsDestroy) const;
+
+ bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder,
+ mlir::Location loc, mlir::Value privatized) const;
};
} // namespace fir::acc
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 0b31cfe..bbb7e6e 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault(
fir::RecordType recordType, llvm::StringRef component,
mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr);
+/// Indicate if a derived type has final routine. Returns std::nullopt if that
+/// information is not in the IR;
+std::optional<bool>
+isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+ const mlir::SymbolTable *symbolTable = nullptr);
+
/// Generate a LLVM constant value of type `ity`, using the provided offset.
mlir::LLVM::ConstantOp
genConstantIndex(mlir::Location loc, mlir::Type ity,
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index 98dc78f..604b137 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -524,12 +524,18 @@ getNamelistGroup(Fortran::lower::AbstractConverter &converter,
descAddr =
builder.createConvert(loc, builder.getRefType(symType), varAddr);
} else {
+ fir::BaseBoxType boxType;
const auto expr = Fortran::evaluate::AsGenericExpr(s);
fir::ExtendedValue exv = converter.genExprAddr(*expr, stmtCtx);
mlir::Type type = fir::getBase(exv).getType();
+ bool isClassType = mlir::isa<fir::ClassType>(type);
if (mlir::Type baseTy = fir::dyn_cast_ptrOrBoxEleTy(type))
type = baseTy;
- fir::BoxType boxType = fir::BoxType::get(fir::PointerType::get(type));
+
+ if (isClassType)
+ boxType = fir::ClassType::get(fir::PointerType::get(type));
+ else
+ boxType = fir::BoxType::get(fir::PointerType::get(type));
descAddr = builder.createTemporary(loc, boxType);
fir::MutableBoxValue box = fir::MutableBoxValue(descAddr, {}, {});
fir::factory::associateMutableBox(builder, loc, box, exv,
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 62e5c0c..cfb1891 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -978,15 +978,40 @@ static RecipeOp genRecipeOp(
auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty);
assert(mappableTy &&
"Expected that all variable types are considered mappable");
+ bool needsDestroy = false;
auto retVal = mappableTy.generatePrivateInit(
builder, loc,
mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>(
initBlock->getArgument(0)),
initName,
initBlock->getArguments().take_back(initBlock->getArguments().size() - 1),
- initValue);
+ initValue, needsDestroy);
mlir::acc::YieldOp::create(builder, loc,
retVal ? retVal : initBlock->getArgument(0));
+ // Create destroy region and generate destruction if requested.
+ if (needsDestroy) {
+ llvm::SmallVector<mlir::Type> destroyArgsTy;
+ llvm::SmallVector<mlir::Location> destroyArgsLoc;
+ // original and privatized/reduction value
+ destroyArgsTy.push_back(ty);
+ destroyArgsTy.push_back(ty);
+ destroyArgsLoc.push_back(loc);
+ destroyArgsLoc.push_back(loc);
+ // Append bounds arguments (if any) in the same order as init region
+ if (argsTy.size() > 1) {
+ destroyArgsTy.append(argsTy.begin() + 1, argsTy.end());
+ destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc);
+ }
+
+ builder.createBlock(&recipe.getDestroyRegion(),
+ recipe.getDestroyRegion().end(), destroyArgsTy,
+ destroyArgsLoc);
+ builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back());
+ // Call interface on the privatized/reduction value (2nd argument).
+ (void)mappableTy.generatePrivateDestroy(
+ builder, loc, recipe.getDestroyRegion().front().getArgument(1));
+ mlir::acc::TerminatorOp::create(builder, loc);
+ }
return recipe;
}
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 5fe2a76..e07baaf 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{
{"dst", asAddr},
{"nbytes", asValue}}},
/*isElemental=*/false},
+ {"tma_bulk_s2g",
+ &I::genTMABulkS2G,
+ {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}},
+ /*isElemental=*/false},
{"tma_bulk_wait_group",
&I::genTMABulkWaitGroup,
{{}},
@@ -9227,6 +9231,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) {
builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
}
+// TMA_BULK_S2G (CUDA)
+void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]),
+ mlir::NVVM::NVVMMemorySpace::Shared);
+ mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]),
+ mlir::NVVM::NVVMMemorySpace::Global);
+ mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
+ builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+}
+
// TMA_BULK_WAIT_GROUP (CUDA)
void IntrinsicLibrary::genTMABulkWaitGroup(
llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a05cd9..0afb295 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3229,6 +3229,10 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
g.setAddrSpace(
static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared));
+ if (global.getDataAttr() &&
+ *global.getDataAttr() == cuf::DataAttribute::Constant)
+ TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation");
+
rewriter.eraseOp(global);
return mlir::success();
}
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 89aa010..41383fb 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -21,6 +21,7 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/BuiltinOps.h"
@@ -548,14 +549,27 @@ template <typename Ty>
mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const {
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const {
+ needsDestroy = false;
mlir::Value retVal;
mlir::Type unwrappedTy = fir::unwrapRefType(type);
mlir::ModuleOp mod = builder.getInsertionBlock()
->getParent()
->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder firBuilder(builder, mod);
+ if (auto recType = llvm::dyn_cast<fir::RecordType>(
+ fir::getFortranElementType(unwrappedTy))) {
+ // Need to make deep copies of allocatable components.
+ if (fir::isRecordWithAllocatableMember(recType))
+ TODO(loc,
+ "OpenACC: privatizing derived type with allocatable components");
+ // Need to decide if user assignment/final routine should be called.
+ if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false))
+ TODO(loc, "OpenACC: privatizing derived type with user assignment or "
+ "final routine ");
+ }
+
+ fir::FirOpBuilder firBuilder(builder, mod);
auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp {
auto alloca = fir::AllocaOp::create(firBuilder, loc, ty);
return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName);
@@ -615,9 +629,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Value firClass =
fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar);
fir::StoreOp::create(builder, loc, firClass, retVal);
+ needsDestroy = true;
} else if (mlir::isa<fir::SequenceType>(innerTy)) {
hlfir::Entity source = hlfir::Entity{var};
- auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
+ auto [temp, cleanupFlag] =
+ hlfir::createTempFromMold(loc, firBuilder, source);
if (fir::isa_ref_type(type)) {
// When the temp is created - it is not a reference - thus we can
// end up with a type inconsistency. Therefore ensure storage is created
@@ -636,6 +652,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
} else {
retVal = temp;
}
+ // If heap was allocated, a destroy is required later.
+ if (cleanupFlag)
+ needsDestroy = true;
} else {
TODO(loc, "Unsupported boxed type for OpenACC private-like recipe");
}
@@ -667,23 +686,61 @@ template mlir::Value
OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::PointerType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
+
+template <typename Ty>
+bool OpenACCMappableModel<Ty>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const {
+ mlir::Type unwrappedTy = fir::unwrapRefType(type);
+ // For boxed scalars allocated with AllocMem during init, free the heap.
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) {
+ mlir::Value boxVal = privatized;
+ if (fir::isa_ref_type(boxVal.getType()))
+ boxVal = fir::LoadOp::create(builder, loc, boxVal);
+ mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal);
+ // FreeMem only accepts fir.heap and this may not be represented in the box
+ // type if the privatized entity is not an allocatable.
+ mlir::Type heapType =
+ fir::HeapType::get(fir::unwrapRefType(addr.getType()));
+ if (heapType != addr.getType())
+ addr = fir::ConvertOp::create(builder, loc, heapType, addr);
+ fir::FreeMemOp::create(builder, loc, addr);
+ return true;
+ }
+
+ // Nothing to do for other categories by default, they are stack allocated.
+ return true;
+}
+
+template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index c71642c..92390e4a 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
return std::nullopt;
}
+std::optional<bool>
+fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+ const mlir::SymbolTable *symbolTable) {
+ fir::TypeInfoOp typeInfo =
+ fir::lookupTypeInfoOp(recordType, module, symbolTable);
+ if (!typeInfo)
+ return std::nullopt;
+ return !typeInfo.getNoFinal();
+}
+
mlir::LLVM::ConstantOp
fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
mlir::ConversionPatternRewriter &rewriter,
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 609a1fc..759e3a65d 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
if (srcTy.isInteger(1)) {
// i1 is not a supported type in the descriptor and it is actually coming
// from a LOGICAL constant. Use the destination type to avoid mismatch.
+ assert(dstEleTy && "expect dst element type to be set");
srcTy = dstEleTy;
src = createConvertOp(rewriter, loc, srcTy, src);
addr = builder.createTemporary(loc, srcTy);
@@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion
// Initialization of an array from a scalar value should be implemented
// via a kernel launch. Use the flang runtime via the Assign function
// until we have more infrastructure.
- mlir::Value src = emboxSrc(rewriter, op, symtab);
+ mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy));
+ mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy);
mlir::Value dst = emboxDst(rewriter, op, symtab);
mlir::func::FuncOp func =
fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
@@ -739,6 +741,9 @@ struct CUFDataTransferOpConversion
fir::StoreOp::create(builder, loc, val, box);
return box;
}
+ if (mlir::isa<fir::BaseBoxType>(val.getType()))
+ if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(val.getDefiningOp()))
+ return loadOp.getMemref();
return val;
};
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index a8b9aa8..22df9cd 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -2034,6 +2034,15 @@ implicit none
end subroutine
end interface
+ interface
+ attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
+ !dir$ ignore_tkr src, dst
+ integer(4), shared :: src(*)
+ integer(4), device :: dst(*)
+ integer(4), value :: nbytes
+ end subroutine
+ end interface
+
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 669300c..b247fce 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -651,5 +651,79 @@ func.func @_QPsub28() {
// CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<8>>>) -> !fir.ref<!fir.box<none>>
// CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
+func.func @_QPtesti4(%arg0: !fir.ref<i32> {fir.bindc_name = "n1"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n2"}, %arg2: !fir.ref<i32> {fir.bindc_name = "n3"}, %arg3: !fir.ref<i32> {fir.bindc_name = "n4"}) {
+ %true = arith.constant true
+ %c0 = arith.constant 0 : index
+ %c2_i32 = arith.constant 2 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtesti4En1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtesti4En2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtesti4En3"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %4:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtesti4En4"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+ %5 = fir.load %1#0 : !fir.ref<i32>
+ %6 = arith.divsi %5, %c2_i32 : i32
+ %7 = fir.convert %6 : (i32) -> index
+ %8 = arith.cmpi sgt, %7, %c0 : index
+ %9 = arith.select %8, %7, %c0 : index
+ %10 = fir.load %2#0 : !fir.ref<i32>
+ %11 = arith.divsi %10, %c2_i32 : i32
+ %12 = fir.convert %11 : (i32) -> index
+ %13 = arith.cmpi sgt, %12, %c0 : index
+ %14 = arith.select %13, %12, %c0 : index
+ %15 = fir.load %3#0 : !fir.ref<i32>
+ %16 = arith.divsi %15, %c2_i32 : i32
+ %17 = fir.convert %16 : (i32) -> index
+ %18 = arith.cmpi sgt, %17, %c0 : index
+ %19 = arith.select %18, %17, %c0 : index
+ %20 = fir.load %4#0 : !fir.ref<i32>
+ %21 = arith.divsi %20, %c2_i32 : i32
+ %22 = fir.convert %21 : (i32) -> index
+ %23 = arith.cmpi sgt, %22, %c0 : index
+ %24 = arith.select %23, %22, %c0 : index
+ %25 = cuf.alloc !fir.array<?x?x?x?x!fir.logical<4>>, %9, %14, %19, %24 : index, index, index, index {bindc_name = "lma", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} -> !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+ %26 = fir.shape %9, %14, %19, %24 : (index, index, index, index) -> !fir.shape<4>
+ %27:2 = hlfir.declare %25(%26) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} : (!fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.shape<4>) -> (!fir.box<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>)
+ cuf.data_transfer %true to %27#1, %26 : !fir.shape<4> {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+ cuf.free %27#1 : !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>> {data_attr = #cuf.cuda<managed>}
+ return
+}
+
+// CHECK-LABEL: func.func @_QPtesti4
+// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+
+// -----
+
+func.func @_QQmain() attributes {fir.bindc_name = "T"} {
+ %c2 = arith.constant 2 : index
+ %c1 = arith.constant 1 : index
+ %c80 = arith.constant 80 : index
+ %c0 = arith.constant 0 : index
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?x?xf16>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+ %2 = fir.zero_bits !fir.heap<!fir.array<?x?x?xf16>>
+ %3 = fir.shape %c0, %c0, %c0 : (index, index, index) -> !fir.shape<3>
+ %4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.box<!fir.heap<!fir.array<?x?x?xf16>>>
+ fir.store %4 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+ %5 = fir.declare %1 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+ %6 = fir.address_of(@_QFEha) : !fir.ref<!fir.array<80x80x80xf32>>
+ %7 = fir.shape %c80, %c80, %c80 : (index, index, index) -> !fir.shape<3>
+ %8 = fir.declare %6(%7) {uniq_name = "_QFEha"} : (!fir.ref<!fir.array<80x80x80xf32>>, !fir.shape<3>) -> !fir.ref<!fir.array<80x80x80xf32>>
+ %9 = fir.address_of(@_QFECn) : !fir.ref<i32>
+ %10 = fir.declare %9 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QFECn"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %11 = fir.load %5 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+ %12:3 = fir.box_dims %11, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+ %13:3 = fir.box_dims %11, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+ %14:3 = fir.box_dims %11, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+ %15 = fir.shape %12#1, %13#1, %14#1 : (index, index, index) -> !fir.shape<3>
+ %16 = fir.allocmem !fir.array<?x?x?xf16>, %12#1, %13#1, %14#1 {bindc_name = ".tmp", uniq_name = ""}
+ %17 = fir.declare %16(%15) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.heap<!fir.array<?x?x?xf16>>
+ %18 = fir.embox %17(%15) : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x?xf16>>
+ cuf.data_transfer %11 to %18 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, !fir.box<!fir.array<?x?x?xf16>>
+ return
+}
+
+// CHECK-LABEL: func.func @_QQmain()
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc
+
} // end of module
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 83ee011..29c348c 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -438,7 +438,7 @@ end subroutine
! CHECK: nvvm.cp.async.bulk.commit.group
! CHECK: nvvm.cp.async.bulk.wait_group 0
-attributes(global) subroutine test_bulk_g2s(c, a, b, n)
+attributes(global) subroutine test_bulk_g2s(a)
real(8), device :: a(*)
real(8), shared :: tmpa(1024)
integer(8), shared :: barrier1
@@ -448,3 +448,13 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_bulk_g2s
! CHECK: nvvm.cp.async.bulk.shared.cluster.global %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : <7>, <1>
+
+attributes(global) subroutine test_bulk_s2g(a)
+ real(8), device :: a(*)
+ real(8), shared :: tmpa(1024)
+ integer(4) :: tx_count
+ call tma_bulk_s2g(tmpa, a(j), tx_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_bulk_s2g
+! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
index 429f207..3987f9f 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
@@ -4,6 +4,11 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK
+! TODO: This test hits a fatal TODO. Deal with allocatable component
+! destructions. For arrays, allocatable component allocation may also be
+! missing.
+! XFAIL: *
+
module m_firstprivate_derived_alloc_comp
type point
real, allocatable :: x(:)
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index d37eb8d..485825d 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -26,6 +26,12 @@
! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>>
! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -47,6 +53,12 @@
! CHECK: %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
! CHECK: hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -64,6 +76,12 @@
! CHECK: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
! CHECK: hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init {
@@ -74,6 +92,12 @@
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>)
! CHECK: acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init {
@@ -89,6 +113,13 @@
! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init {
@@ -99,6 +130,12 @@
! CHECK: %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK: fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK: fir.freemem %[[ADDR]] : !fir.heap<i32>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
@@ -114,6 +151,12 @@
! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -124,6 +167,12 @@
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
! CHECK: acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init {
@@ -140,6 +189,7 @@
! CHECK: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init {
diff --git a/flang/test/Lower/namelist.f90 b/flang/test/Lower/namelist.f90
index 770af46..a258da1 100644
--- a/flang/test/Lower/namelist.f90
+++ b/flang/test/Lower/namelist.f90
@@ -123,6 +123,45 @@ subroutine global_pointer
write(10, nml=mygroup)
end
+module m
+ type base
+ real :: r1
+ end type
+ interface write(formatted)
+ subroutine writeformatted(dtv, unit, iotype, v_list, iostat, iomsg )
+ import base
+ class(base), intent(in) :: dtv
+ integer, intent(in) :: unit
+ character(*), intent(in) :: iotype
+ integer, intent(in) :: v_list(:)
+ integer, intent(out) :: iostat
+ character(*), intent(inout) :: iomsg
+ end subroutine
+ end interface
+end module
+
+! CHECK-LABEL: c.func @_QPlocal_poly_namelist
+subroutine local_poly_namelist
+ use m
+ class(base), allocatable :: b1
+! CHECK: %[[V_0:[0-9]+]] = fir.alloca !fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>
+! CHECK: %[[V_2:[0-9]+]] = fir.alloca !fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>> {bindc_name = "b1", uniq_name = "_QFlocal_poly_namelistEb1"}
+! CHECK: %[[V_5:[0-9]+]] = fir.declare %[[V_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFlocal_poly_namelistEb1"} : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>) -> !fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK: %[[V_9:[0-9]+]] = fir.alloca !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK: %[[V_10:[0-9]+]] = fir.undefined !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK: %[[V_11:[0-9]+]] = fir.address_of(@_QQclX623100) : !fir.ref<!fir.char<1,3>>
+! CHECK: %[[V_12:[0-9]+]] = fir.convert %[[V_11]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+! CHECK: %[[V_13:[0-9]+]] = fir.insert_value %[[V_10]], %[[V_12]], [0 : index, 0 : index] : (!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>, !fir.ref<i8>) -> !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK: %[[V_14:[0-9]+]] = fir.load %[[V_5]] : !fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK: %[[V_15:[0-9]+]] = fir.rebox %[[V_14]] : (!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>) -> !fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>
+! CHECK: fir.store %[[V_15]] to %[[V_0]] : !fir.ref<!fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK: %[[V_16:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref<!fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK: %[[V_17:[0-9]+]] = fir.insert_value %[[V_13]], %[[V_16]], [0 : index, 1 : index] : (!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>, !fir.ref<!fir.box<none>>) -> !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK: fir.store %[[V_17]] to %[[V_9]] : !fir.ref<!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>>
+ namelist/mygroup/b1
+ write(10, nml=mygroup)
+end subroutine
+
module mmm
real rrr
namelist /aaa/ rrr
@@ -142,3 +181,4 @@ end
! CHECK-NOT: ggg
! CHECK: fir.string_lit "aaa\00"(4) : !fir.char<1,4>
+