aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
Diffstat (limited to 'flang')
-rw-r--r--flang/include/flang/Lower/AbstractConverter.h4
-rw-r--r--flang/include/flang/Lower/OpenACC.h5
-rw-r--r--flang/include/flang/Parser/parse-tree.h28
-rw-r--r--flang/include/flang/Semantics/symbol.h3
-rw-r--r--flang/lib/Lower/Bridge.cpp70
-rw-r--r--flang/lib/Lower/OpenACC.cpp380
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp1
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp2
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp225
-rw-r--r--flang/test/Examples/omp-atomic.f902
-rw-r--r--flang/test/Examples/omp-declarative-directive.f902
-rw-r--r--flang/test/Examples/omp-device-constructs.f902
-rw-r--r--flang/test/Examples/omp-in-reduction-clause.f902
-rw-r--r--flang/test/Examples/omp-nowait.f902
-rw-r--r--flang/test/Examples/omp-order-clause.f902
-rw-r--r--flang/test/Examples/omp-sections.f902
-rw-r--r--flang/test/Lower/OpenACC/acc-data-operands-remapping.f90601
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f9026
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f9026
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f9021
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived.f9021
-rw-r--r--flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f9041
-rw-r--r--flang/test/Lower/OpenACC/acc-loop-exit.f902
-rw-r--r--flang/test/Lower/OpenACC/acc-private.f902
-rw-r--r--flang/test/Lower/OpenACC/do-loops-to-acc-loops.f9030
-rw-r--r--flang/test/Lower/OpenMP/distribute-parallel-do-simd.f902
-rw-r--r--flang/test/Lower/OpenMP/map-descriptor-deferral.f9096
-rw-r--r--flang/test/Lower/generic-shadows-specific.F9040
-rw-r--r--flang/test/Transforms/omp-map-info-finalization.fir6
29 files changed, 1463 insertions, 183 deletions
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h
index 0ffe27e..f8322a5 100644
--- a/flang/include/flang/Lower/AbstractConverter.h
+++ b/flang/include/flang/Lower/AbstractConverter.h
@@ -123,6 +123,10 @@ public:
virtual Fortran::lower::SymMap::StorageDesc
getSymbolStorage(SymbolRef sym) = 0;
+ /// Return the Symbol Map used to map semantics::Symbol to their SSA
+ /// values in the generated MLIR.
+ virtual Fortran::lower::SymMap &getSymbolMap() = 0;
+
/// Override lowering of expression with pre-lowered values.
/// Associate mlir::Value to evaluate::Expr. All subsequent call to
/// genExprXXX() will replace any occurrence of an overridden
diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h
index 4622dbc..69f1f5b 100644
--- a/flang/include/flang/Lower/OpenACC.h
+++ b/flang/include/flang/Lower/OpenACC.h
@@ -122,6 +122,11 @@ void genOpenACCTerminator(fir::FirOpBuilder &, mlir::Operation *,
/// clause.
uint64_t getLoopCountForCollapseAndTile(const Fortran::parser::AccClauseList &);
+/// Parse collapse clause and return {size, force}. If absent, returns
+/// {1,false}.
+std::pair<uint64_t, bool>
+getCollapseSizeAndForce(const Fortran::parser::AccClauseList &);
+
/// Checks whether the current insertion point is inside OpenACC loop.
bool isInOpenACCLoop(fir::FirOpBuilder &);
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 1443e93..d919b77 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3517,7 +3517,9 @@ struct OmpObject {
std::variant<Designator, /*common block*/ Name, Invalid> u;
};
-WRAPPER_CLASS(OmpObjectList, std::list<OmpObject>);
+struct OmpObjectList {
+ WRAPPER_CLASS_BOILERPLATE(OmpObjectList, std::list<OmpObject>);
+};
// Ref: [4.5:201-207], [5.0:293-299], [5.1:325-331], [5.2:124]
//
@@ -3547,14 +3549,18 @@ struct OmpTypeSpecifier {
std::variant<TypeSpec, DeclarationTypeSpec> u;
};
-WRAPPER_CLASS(OmpTypeNameList, std::list<OmpTypeSpecifier>);
+struct OmpTypeNameList {
+ WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list<OmpTypeSpecifier>);
+};
struct OmpLocator {
UNION_CLASS_BOILERPLATE(OmpLocator);
std::variant<OmpObject, FunctionReference> u;
};
-WRAPPER_CLASS(OmpLocatorList, std::list<OmpLocator>);
+struct OmpLocatorList {
+ WRAPPER_CLASS_BOILERPLATE(OmpLocatorList, std::list<OmpLocator>);
+};
// Ref: [4.5:58-60], [5.0:58-60], [5.1:63-68], [5.2:197-198], [6.0:334-336]
//
@@ -4324,7 +4330,9 @@ struct OmpIteration {
//
// iteration-vector ->
// [iteration...] // since 4.5
-WRAPPER_CLASS(OmpIterationVector, std::list<OmpIteration>);
+struct OmpIterationVector {
+ WRAPPER_CLASS_BOILERPLATE(OmpIterationVector, std::list<OmpIteration>);
+};
// Extract this into a separate structure (instead of having it directly in
// OmpDoacrossClause), so that the context in TYPE_CONTEXT_PARSER can be set
@@ -4364,14 +4372,18 @@ struct OmpDependClause {
//
// doacross-clause ->
// DOACROSS(dependence-type: iteration-vector) // since 5.2
-WRAPPER_CLASS(OmpDoacrossClause, OmpDoacross);
+struct OmpDoacrossClause {
+ WRAPPER_CLASS_BOILERPLATE(OmpDoacrossClause, OmpDoacross);
+};
// Ref: [5.0:254-255], [5.1:287-288], [5.2:73]
//
// destroy-clause ->
// DESTROY | // since 5.0, until 5.1
// DESTROY(variable) // since 5.2
-WRAPPER_CLASS(OmpDestroyClause, OmpObject);
+struct OmpDestroyClause {
+ WRAPPER_CLASS_BOILERPLATE(OmpDestroyClause, OmpObject);
+};
// Ref: [5.0:135-140], [5.1:161-166], [5.2:265-266]
//
@@ -4785,7 +4797,9 @@ struct OmpInitClause {
// REF: [5.1:217-220], [5.2:294]
//
// 14.1.3 use-clause -> USE (interop-var)
-WRAPPER_CLASS(OmpUseClause, OmpObject);
+struct OmpUseClause {
+ WRAPPER_CLASS_BOILERPLATE(OmpUseClause, OmpObject);
+};
// OpenMP Clauses
struct OmpClause {
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 975423b..98e2a5f 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -1126,6 +1126,9 @@ inline const DeclTypeSpec *Symbol::GetTypeImpl(int depth) const {
[&](const HostAssocDetails &x) {
return x.symbol().GetTypeImpl(depth);
},
+ [&](const GenericDetails &x) {
+ return x.specific() ? x.specific()->GetTypeImpl(depth) : nullptr;
+ },
[](const auto &) -> const DeclTypeSpec * { return nullptr; },
},
details_);
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index 780d56f..50a687c 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -643,6 +643,8 @@ public:
return localSymbols.lookupStorage(sym);
}
+ Fortran::lower::SymMap &getSymbolMap() override final { return localSymbols; }
+
void
overrideExprValues(const Fortran::lower::ExprToValueMap *map) override final {
exprValueOverrides = map;
@@ -3190,15 +3192,20 @@ private:
std::get_if<Fortran::parser::OpenACCCombinedConstruct>(&acc.u);
Fortran::lower::pft::Evaluation *curEval = &getEval();
+ // Determine collapse depth/force and loopCount
+ bool collapseForce = false;
+ uint64_t collapseDepth = 1;
+ uint64_t loopCount = 1;
if (accLoop || accCombined) {
- uint64_t loopCount;
if (accLoop) {
const Fortran::parser::AccBeginLoopDirective &beginLoopDir =
std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t);
const Fortran::parser::AccClauseList &clauseList =
std::get<Fortran::parser::AccClauseList>(beginLoopDir.t);
loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
+ std::tie(collapseDepth, collapseForce) =
+ Fortran::lower::getCollapseSizeAndForce(clauseList);
} else if (accCombined) {
const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir =
std::get<Fortran::parser::AccBeginCombinedDirective>(
@@ -3206,6 +3213,8 @@ private:
const Fortran::parser::AccClauseList &clauseList =
std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t);
loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList);
+ std::tie(collapseDepth, collapseForce) =
+ Fortran::lower::getCollapseSizeAndForce(clauseList);
}
if (curEval->lowerAsStructured()) {
@@ -3215,8 +3224,63 @@ private:
}
}
- for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
- genFIR(e);
+ const bool isStructured = curEval && curEval->lowerAsStructured();
+ if (isStructured && collapseForce && collapseDepth > 1) {
+ // force: collect prologue/epilogue for the first collapseDepth nested
+ // loops and sink them into the innermost loop body at that depth
+ llvm::SmallVector<Fortran::lower::pft::Evaluation *> prologue, epilogue;
+ Fortran::lower::pft::Evaluation *parent = &getEval();
+ Fortran::lower::pft::Evaluation *innermostLoopEval = nullptr;
+ for (uint64_t lvl = 0; lvl + 1 < collapseDepth; ++lvl) {
+ epilogue.clear();
+ auto &kids = parent->getNestedEvaluations();
+ // Collect all non-loop statements before the next inner loop as
+ // prologue, then mark remaining siblings as epilogue and descend into
+ // the inner loop.
+ Fortran::lower::pft::Evaluation *childLoop = nullptr;
+ for (auto it = kids.begin(); it != kids.end(); ++it) {
+ if (it->getIf<Fortran::parser::DoConstruct>()) {
+ childLoop = &*it;
+ for (auto it2 = std::next(it); it2 != kids.end(); ++it2)
+ epilogue.push_back(&*it2);
+ break;
+ }
+ prologue.push_back(&*it);
+ }
+ // Semantics guarantees collapseDepth does not exceed nest depth
+ // so childLoop must be found here.
+ assert(childLoop && "Expected inner DoConstruct for collapse");
+ parent = childLoop;
+ innermostLoopEval = childLoop;
+ }
+
+ // Track sunk evaluations (avoid double-lowering)
+ llvm::SmallPtrSet<const Fortran::lower::pft::Evaluation *, 16> sunk;
+ for (auto *e : prologue)
+ sunk.insert(e);
+ for (auto *e : epilogue)
+ sunk.insert(e);
+
+ auto sink =
+ [&](llvm::SmallVector<Fortran::lower::pft::Evaluation *> &lst) {
+ for (auto *e : lst)
+ genFIR(*e);
+ };
+
+ sink(prologue);
+
+ // Lower innermost loop body, skipping sunk
+ for (Fortran::lower::pft::Evaluation &e :
+ innermostLoopEval->getNestedEvaluations())
+ if (!sunk.contains(&e))
+ genFIR(e);
+
+ sink(epilogue);
+ } else {
+ // Normal lowering
+ for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations())
+ genFIR(e);
+ }
localSymbols.popScope();
builder->restoreInsertionPoint(insertPt);
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 4a9e494..62e5c0c 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -20,6 +20,7 @@
#include "flang/Lower/PFTBuilder.h"
#include "flang/Lower/StatementContext.h"
#include "flang/Lower/Support/Utils.h"
+#include "flang/Lower/SymbolMap.h"
#include "flang/Optimizer/Builder/BoxValue.h"
#include "flang/Optimizer/Builder/Complex.h"
#include "flang/Optimizer/Builder/FIRBuilder.h"
@@ -33,6 +34,7 @@
#include "flang/Semantics/scope.h"
#include "flang/Semantics/tools.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
+#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/STLExtras.h"
@@ -60,6 +62,16 @@ static llvm::cl::opt<bool> lowerDoLoopToAccLoop(
llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."),
llvm::cl::init(true));
+static llvm::cl::opt<bool> enableSymbolRemapping(
+ "openacc-remap-symbols",
+ llvm::cl::desc("Whether to remap symbols that appears in data clauses."),
+ llvm::cl::init(true));
+
+static llvm::cl::opt<bool> enableDevicePtrRemap(
+ "openacc-remap-device-ptr-symbols",
+ llvm::cl::desc("sub-option of openacc-remap-symbols for deviceptr clause"),
+ llvm::cl::init(false));
+
// Special value for * passed in device_type or gang clauses.
static constexpr std::int64_t starCst = -1;
@@ -624,17 +636,19 @@ void genAtomicCapture(Fortran::lower::AbstractConverter &converter,
}
template <typename Op>
-static void
-genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
- Fortran::lower::AbstractConverter &converter,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- llvm::SmallVectorImpl<mlir::Value> &dataOperands,
- mlir::acc::DataClause dataClause, bool structured,
- bool implicit, llvm::ArrayRef<mlir::Value> async,
- llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
- bool setDeclareAttr = false) {
+static void genDataOperandOperations(
+ const Fortran::parser::AccObjectList &objectList,
+ Fortran::lower::AbstractConverter &converter,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ llvm::SmallVectorImpl<mlir::Value> &dataOperands,
+ mlir::acc::DataClause dataClause, bool structured, bool implicit,
+ llvm::ArrayRef<mlir::Value> async,
+ llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ bool setDeclareAttr = false,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
const bool unwrapBoxAddr = true;
@@ -655,6 +669,9 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
/*strideIncludeLowerExtent=*/strideIncludeLowerExtent);
LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
+
// If the input value is optional and is not a descriptor, we use the
// rawInput directly.
mlir::Value baseAddr = ((fir::unwrapRefType(info.addr.getType()) !=
@@ -668,6 +685,11 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
asyncOnlyDeviceTypes, unwrapBoxAddr, info.isPresent);
dataOperands.push_back(op.getAccVar());
+ // Track the symbol and its corresponding mlir::Value if requested
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
+
// For UseDeviceOp, if operand is one of a pair resulting from a
// declare operation, create a UseDeviceOp for the other operand as well.
if constexpr (std::is_same_v<Op, mlir::acc::UseDeviceOp>) {
@@ -681,6 +703,8 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList,
asyncDeviceTypes, asyncOnlyDeviceTypes,
unwrapBoxAddr, info.isPresent);
dataOperands.push_back(op.getAccVar());
+ // Not adding this to symbolPairs because it only make sense to
+ // map the symbol to a single value.
}
}
}
@@ -1264,7 +1288,9 @@ static void genPrivatizationRecipes(
llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
llvm::ArrayRef<mlir::Value> async,
llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
- llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) {
+ llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext};
for (const auto &accObject : objectList.v) {
@@ -1284,6 +1310,9 @@ static void genPrivatizationRecipes(
/*strideIncludeLowerExtent=*/strideIncludeLowerExtent);
LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs()));
+ bool isWholeSymbol =
+ !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator);
+
RecipeOp recipe;
mlir::Type retTy = getTypeFromBounds(bounds, info.addr.getType());
if constexpr (std::is_same_v<RecipeOp, mlir::acc::PrivateRecipeOp>) {
@@ -1297,6 +1326,11 @@ static void genPrivatizationRecipes(
/*implicit=*/false, mlir::acc::DataClause::acc_private, retTy, async,
asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true);
dataOperands.push_back(op.getAccVar());
+
+ // Track the symbol and its corresponding mlir::Value if requested
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
} else {
std::string suffix =
areAllBoundConstant(bounds) ? getBoundsString(bounds) : "";
@@ -1310,6 +1344,11 @@ static void genPrivatizationRecipes(
async, asyncDeviceTypes, asyncOnlyDeviceTypes,
/*unwrapBoxAddr=*/true);
dataOperands.push_back(op.getAccVar());
+
+ // Track the symbol and its corresponding mlir::Value if requested
+ if (symbolPairs && isWholeSymbol)
+ symbolPairs->emplace_back(op.getAccVar(),
+ Fortran::semantics::SymbolRef(symbol));
}
privatizationRecipes.push_back(mlir::SymbolRefAttr::get(
builder.getContext(), recipe.getSymName().str()));
@@ -1949,15 +1988,16 @@ mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder,
return builder.getIntegerType(ivTypeSize * 8);
}
-static void
-privatizeIv(Fortran::lower::AbstractConverter &converter,
- const Fortran::semantics::Symbol &sym, mlir::Location loc,
- llvm::SmallVector<mlir::Type> &ivTypes,
- llvm::SmallVector<mlir::Location> &ivLocs,
- llvm::SmallVector<mlir::Value> &privateOperands,
- llvm::SmallVector<mlir::Value> &ivPrivate,
- llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
- bool isDoConcurrent = false) {
+static void privatizeIv(
+ Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym, mlir::Location loc,
+ llvm::SmallVector<mlir::Type> &ivTypes,
+ llvm::SmallVector<mlir::Location> &ivLocs,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ &ivPrivate,
+ llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+ bool isDoConcurrent = false) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym);
@@ -2001,15 +2041,8 @@ privatizeIv(Fortran::lower::AbstractConverter &converter,
builder.getContext(), recipe.getSymName().str()));
}
- // Map the new private iv to its symbol for the scope of the loop. bindSymbol
- // might create a hlfir.declare op, if so, we map its result in order to
- // use the sym value in the scope.
- converter.bindSymbol(sym, mlir::acc::getAccVar(privateOp));
- auto privateValue = converter.getSymbolAddress(sym);
- if (auto declareOp =
- mlir::dyn_cast<hlfir::DeclareOp>(privateValue.getDefiningOp()))
- privateValue = declareOp.getResults()[0];
- ivPrivate.push_back(privateValue);
+ ivPrivate.emplace_back(mlir::acc::getAccVar(privateOp),
+ Fortran::semantics::SymbolRef(sym));
}
static void determineDefaultLoopParMode(
@@ -2088,7 +2121,8 @@ static void processDoLoopBounds(
llvm::SmallVector<mlir::Value> &upperbounds,
llvm::SmallVector<mlir::Value> &steps,
llvm::SmallVector<mlir::Value> &privateOperands,
- llvm::SmallVector<mlir::Value> &ivPrivate,
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ &ivPrivate,
llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
llvm::SmallVector<mlir::Type> &ivTypes,
llvm::SmallVector<mlir::Location> &ivLocs,
@@ -2144,11 +2178,25 @@ static void processDoLoopBounds(
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
} else {
- auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
- assert(doCons && "expect do construct");
- loopControl = &*doCons->GetLoopControl();
+ // Safely locate the next inner DoConstruct within this eval.
+ const Fortran::parser::DoConstruct *innerDo = nullptr;
+ if (crtEval && crtEval->hasNestedEvaluations()) {
+ for (Fortran::lower::pft::Evaluation &child :
+ crtEval->getNestedEvaluations()) {
+ if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) {
+ innerDo = stmt;
+ // Prepare to descend for the next iteration
+ crtEval = &child;
+ break;
+ }
+ }
+ }
+ if (!innerDo)
+ break; // No deeper loop; stop collecting collapsed bounds.
+
+ loopControl = &*innerDo->GetLoopControl();
locs.push_back(converter.genLocation(
- Fortran::parser::FindSourceLocation(*doCons)));
+ Fortran::parser::FindSourceLocation(*innerDo)));
}
const Fortran::parser::LoopControl::Bounds *bounds =
@@ -2172,32 +2220,127 @@ static void processDoLoopBounds(
inclusiveBounds.push_back(true);
- if (i < loopsToProcess - 1)
- crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ // crtEval already updated when descending; no blind increment here.
}
}
}
-static mlir::acc::LoopOp
-buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
- mlir::Location currentLocation,
- Fortran::semantics::SemanticsContext &semanticsContext,
- Fortran::lower::StatementContext &stmtCtx,
- const Fortran::parser::DoConstruct &outerDoConstruct,
- Fortran::lower::pft::Evaluation &eval,
- llvm::SmallVector<mlir::Value> &privateOperands,
- llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
- llvm::SmallVector<mlir::Value> &gangOperands,
- llvm::SmallVector<mlir::Value> &workerNumOperands,
- llvm::SmallVector<mlir::Value> &vectorOperands,
- llvm::SmallVector<mlir::Value> &tileOperands,
- llvm::SmallVector<mlir::Value> &cacheOperands,
- llvm::SmallVector<mlir::Value> &reductionOperands,
- llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue,
- uint64_t loopsToProcess) {
+/// Remap symbols that appeared in OpenACC data clauses to use the results of
+/// the corresponding data operations. This allows isolating symbol accesses
+/// inside the OpenACC region from accesses in the host and other regions while
+/// preserving Fortran information about the symbols for optimizations.
+template <typename RegionOp>
+static void remapDataOperandSymbols(
+ Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder,
+ RegionOp &regionOp,
+ const llvm::SmallVector<
+ std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ &dataOperandSymbolPairs) {
+ if (!enableSymbolRemapping || dataOperandSymbolPairs.empty())
+ return;
+
+ // Map Symbols that appeared inside data clauses to a new hlfir.declare whose
+ // input is the acc data operation result.
+ // This allows isolating all the symbol accesses inside the compute region
+ // from accesses in the host and other regions while preserving the Fortran
+ // information about the symbols for Fortran specific optimizations inside the
+ // region.
+ Fortran::lower::SymMap &symbolMap = converter.getSymbolMap();
+ mlir::OpBuilder::InsertionGuard insertGuard(builder);
+ builder.setInsertionPointToStart(&regionOp.getRegion().front());
+ llvm::SmallPtrSet<const Fortran::semantics::Symbol *, 8> seenSymbols;
+ mlir::IRMapping mapper;
+ for (auto [value, symbol] : dataOperandSymbolPairs) {
+
+ // If A symbol appears on several data clause, just map it to the first
+ // result (all data operations results for a symbol are pointing same
+ // memory, so it does not matter which one is used).
+ if (seenSymbols.contains(&symbol.get()))
+ continue;
+ seenSymbols.insert(&symbol.get());
+ std::optional<fir::FortranVariableOpInterface> hostDef =
+ symbolMap.lookupVariableDefinition(symbol);
+ assert(hostDef.has_value() && llvm::isa<hlfir::DeclareOp>(*hostDef) &&
+ "expected symbol to be mapped to hlfir.declare");
+ auto hostDeclare = llvm::cast<hlfir::DeclareOp>(*hostDef);
+ // Replace base input and DummyScope inputs.
+ mlir::Value hostInput = hostDeclare.getMemref();
+ mlir::Type hostType = hostInput.getType();
+ mlir::Type computeType = value.getType();
+ if (hostType == computeType) {
+ mapper.map(hostInput, value);
+ } else if (llvm::isa<fir::BaseBoxType>(computeType)) {
+ assert(!llvm::isa<fir::BaseBoxType>(hostType) &&
+ "box type mismatch between compute region variable and "
+ "hlfir.declare input unexpected");
+ if (Fortran::semantics::IsOptional(symbol))
+ TODO(regionOp.getLoc(),
+ "remapping OPTIONAL symbol in OpenACC compute region");
+ auto rawValue =
+ fir::BoxAddrOp::create(builder, regionOp.getLoc(), hostType, value);
+ mapper.map(hostInput, rawValue);
+ } else {
+ assert(!llvm::isa<fir::BaseBoxType>(hostType) &&
+ "compute region variable should not be raw address when host "
+ "hlfir.declare input was a box");
+ assert(fir::isBoxAddress(hostType) == fir::isBoxAddress(computeType) &&
+ "compute region variable should be a pointer/allocatable if and "
+ "only if host is");
+ assert(fir::isa_ref_type(hostType) && fir::isa_ref_type(computeType) &&
+ "compute region variable and host variable should both be raw "
+ "addresses");
+ mlir::Value cast =
+ builder.createConvert(regionOp.getLoc(), hostType, value);
+ mapper.map(hostInput, cast);
+ }
+ if (mlir::Value dummyScope = hostDeclare.getDummyScope()) {
+ // Copy the dummy scope into the region so that aliasing rules about
+ // Fortran dummies are understood inside the region and the abstract dummy
+ // scope type does not have to cross the OpenACC compute region boundary.
+ if (!mapper.contains(dummyScope)) {
+ mlir::Operation *hostDummyScopeOp = dummyScope.getDefiningOp();
+ assert(hostDummyScopeOp &&
+ "dummyScope defining operation must be visible in lowering");
+ (void)builder.clone(*hostDummyScopeOp, mapper);
+ }
+ }
+
+ mlir::Operation *computeDef =
+ builder.clone(*hostDeclare.getOperation(), mapper);
+
+ // The input box already went through an hlfir.declare. It has the correct
+ // local lower bounds and attribute. Do not generate a new fir.rebox.
+ if (llvm::isa<fir::BaseBoxType>(hostDeclare.getMemref().getType()))
+ llvm::cast<hlfir::DeclareOp>(*computeDef).setSkipRebox(true);
+
+ symbolMap.addVariableDefinition(
+ symbol, llvm::cast<fir::FortranVariableOpInterface>(computeDef));
+ }
+}
+
+static mlir::acc::LoopOp buildACCLoopOp(
+ Fortran::lower::AbstractConverter &converter,
+ mlir::Location currentLocation,
+ Fortran::semantics::SemanticsContext &semanticsContext,
+ Fortran::lower::StatementContext &stmtCtx,
+ const Fortran::parser::DoConstruct &outerDoConstruct,
+ Fortran::lower::pft::Evaluation &eval,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ &dataOperandSymbolPairs,
+ llvm::SmallVector<mlir::Value> &gangOperands,
+ llvm::SmallVector<mlir::Value> &workerNumOperands,
+ llvm::SmallVector<mlir::Value> &vectorOperands,
+ llvm::SmallVector<mlir::Value> &tileOperands,
+ llvm::SmallVector<mlir::Value> &cacheOperands,
+ llvm::SmallVector<mlir::Value> &reductionOperands,
+ llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue,
+ uint64_t loopsToProcess) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
- llvm::SmallVector<mlir::Value> ivPrivate;
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ ivPrivate;
llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<bool> inclusiveBounds;
@@ -2231,10 +2374,22 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
builder, builder.getFusedLoc(locs), currentLocation, eval, operands,
operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes,
ivLocs);
-
- for (auto [arg, value] : llvm::zip(
- loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate))
- fir::StoreOp::create(builder, currentLocation, arg, value);
+ // Ensure the iv symbol is mapped to private iv SSA value for the scope of
+ // the loop even if it did not appear explicitly in a PRIVATE clause (if it
+ // appeared explicitly in such clause, that is also fine because duplicates
+ // in the list are ignored).
+ dataOperandSymbolPairs.append(ivPrivate.begin(), ivPrivate.end());
+ // Remap symbols from data clauses to use data operation results
+ remapDataOperandSymbols(converter, builder, loopOp, dataOperandSymbolPairs);
+
+ for (auto [arg, iv] :
+ llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
+ ivPrivate)) {
+ // Store block argument to the related iv private variable.
+ mlir::Value privateValue =
+ converter.getSymbolAddress(std::get<Fortran::semantics::SymbolRef>(iv));
+ fir::StoreOp::create(builder, currentLocation, arg, privateValue);
+ }
loopOp.setInclusiveUpperbound(inclusiveBounds);
@@ -2260,6 +2415,10 @@ static mlir::acc::LoopOp createLoopOp(
llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments;
llvm::SmallVector<int64_t> collapseValues;
+ // Vector to track mlir::Value results and their corresponding Fortran symbols
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ dataOperandSymbolPairs;
+
llvm::SmallVector<mlir::Attribute> gangArgTypes;
llvm::SmallVector<mlir::Attribute> seqDeviceTypes, independentDeviceTypes,
autoDeviceTypes, vectorOperandsDeviceTypes, workerNumOperandsDeviceTypes,
@@ -2380,7 +2539,8 @@ static mlir::acc::LoopOp createLoopOp(
genPrivatizationRecipes<mlir::acc::PrivateRecipeOp>(
privateClause->v, converter, semanticsContext, stmtCtx,
privateOperands, privatizationRecipes, /*async=*/{},
- /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{});
+ /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{},
+ &dataOperandSymbolPairs);
} else if (const auto *reductionClause =
std::get_if<Fortran::parser::AccClause::Reduction>(
&clause.u)) {
@@ -2406,10 +2566,6 @@ static mlir::acc::LoopOp createLoopOp(
std::get_if<Fortran::parser::AccClause::Collapse>(
&clause.u)) {
const Fortran::parser::AccCollapseArg &arg = collapseClause->v;
- const auto &force = std::get<bool>(arg.t);
- if (force)
- TODO(clauseLocation, "OpenACC collapse force modifier");
-
const auto &intExpr =
std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t);
const auto *expr = Fortran::semantics::GetExpr(intExpr);
@@ -2436,9 +2592,9 @@ static mlir::acc::LoopOp createLoopOp(
Fortran::lower::getLoopCountForCollapseAndTile(accClauseList);
auto loopOp = buildACCLoopOp(
converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct,
- eval, privateOperands, privatizationRecipes, gangOperands,
- workerNumOperands, vectorOperands, tileOperands, cacheOperands,
- reductionOperands, retTy, yieldValue, loopsToProcess);
+ eval, privateOperands, privatizationRecipes, dataOperandSymbolPairs,
+ gangOperands, workerNumOperands, vectorOperands, tileOperands,
+ cacheOperands, reductionOperands, retTy, yieldValue, loopsToProcess);
if (!gangDeviceTypes.empty())
loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes));
@@ -2568,7 +2724,9 @@ static void genDataOperandOperationsWithModifier(
llvm::ArrayRef<mlir::Value> async,
llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes,
llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes,
- bool setDeclareAttr = false) {
+ bool setDeclareAttr = false,
+ llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ *symbolPairs = nullptr) {
const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v;
const auto &accObjectList =
std::get<Fortran::parser::AccObjectList>(listWithModifier.t);
@@ -2581,7 +2739,7 @@ static void genDataOperandOperationsWithModifier(
stmtCtx, dataClauseOperands, dataClause,
/*structured=*/true, /*implicit=*/false, async,
asyncDeviceTypes, asyncOnlyDeviceTypes,
- setDeclareAttr);
+ setDeclareAttr, symbolPairs);
}
template <typename Op>
@@ -2612,6 +2770,10 @@ static Op createComputeOp(
llvm::SmallVector<mlir::Attribute> privatizationRecipes,
firstPrivatizationRecipes, reductionRecipes;
+ // Vector to track mlir::Value results and their corresponding Fortran symbols
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ dataOperandSymbolPairs;
+
// Self clause has optional values but can be present with
// no value as well. When there is no value, the op has an attribute to
// represent the clause.
@@ -2732,7 +2894,8 @@ static Op createComputeOp(
copyClause->v, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_copy,
/*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *copyinClause =
@@ -2744,7 +2907,8 @@ static Op createComputeOp(
Fortran::parser::AccDataModifier::Modifier::ReadOnly,
dataClauseOperands, mlir::acc::DataClause::acc_copyin,
mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
copyinEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *copyoutClause =
@@ -2757,7 +2921,8 @@ static Op createComputeOp(
Fortran::parser::AccDataModifier::Modifier::ReadOnly,
dataClauseOperands, mlir::acc::DataClause::acc_copyout,
mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *createClause =
@@ -2769,7 +2934,8 @@ static Op createComputeOp(
Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands,
mlir::acc::DataClause::acc_create,
mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
createEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *noCreateClause =
@@ -2780,7 +2946,8 @@ static Op createComputeOp(
noCreateClause->v, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_no_create,
/*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
nocreateEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *presentClause =
@@ -2791,17 +2958,21 @@ static Op createComputeOp(
presentClause->v, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_present,
/*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
presentEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *devicePtrClause =
std::get_if<Fortran::parser::AccClause::Deviceptr>(
&clause.u)) {
+ llvm::SmallVectorImpl<
+ std::pair<mlir::Value, Fortran::semantics::SymbolRef>> *symPairs =
+ enableDevicePtrRemap ? &dataOperandSymbolPairs : nullptr;
genDataOperandOperations<mlir::acc::DevicePtrOp>(
devicePtrClause->v, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_deviceptr,
/*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, symPairs);
} else if (const auto *attachClause =
std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) {
auto crtDataStart = dataClauseOperands.size();
@@ -2809,7 +2980,8 @@ static Op createComputeOp(
attachClause->v, converter, semanticsContext, stmtCtx,
dataClauseOperands, mlir::acc::DataClause::acc_attach,
/*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
} else if (const auto *privateClause =
@@ -2819,14 +2991,14 @@ static Op createComputeOp(
genPrivatizationRecipes<mlir::acc::PrivateRecipeOp>(
privateClause->v, converter, semanticsContext, stmtCtx,
privateOperands, privatizationRecipes, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, &dataOperandSymbolPairs);
} else if (const auto *firstprivateClause =
std::get_if<Fortran::parser::AccClause::Firstprivate>(
&clause.u)) {
genPrivatizationRecipes<mlir::acc::FirstprivateRecipeOp>(
firstprivateClause->v, converter, semanticsContext, stmtCtx,
firstprivateOperands, firstPrivatizationRecipes, async,
- asyncDeviceTypes, asyncOnlyDeviceTypes);
+ asyncDeviceTypes, asyncOnlyDeviceTypes, &dataOperandSymbolPairs);
} else if (const auto *reductionClause =
std::get_if<Fortran::parser::AccClause::Reduction>(
&clause.u)) {
@@ -2846,7 +3018,8 @@ static Op createComputeOp(
converter, semanticsContext, stmtCtx, dataClauseOperands,
mlir::acc::DataClause::acc_reduction,
/*structured=*/true, /*implicit=*/true, async, asyncDeviceTypes,
- asyncOnlyDeviceTypes);
+ asyncOnlyDeviceTypes, /*setDeclareAttr=*/false,
+ &dataOperandSymbolPairs);
copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart,
dataClauseOperands.end());
}
@@ -2945,6 +3118,11 @@ static Op createComputeOp(
computeOp.setCombinedAttr(builder.getUnitAttr());
auto insPt = builder.saveInsertionPoint();
+
+ // Remap symbols from data clauses to use data operation results
+ remapDataOperandSymbols(converter, builder, computeOp,
+ dataOperandSymbolPairs);
+
builder.setInsertionPointAfter(computeOp);
// Create the exit operations after the region.
@@ -4860,25 +5038,34 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder,
uint64_t Fortran::lower::getLoopCountForCollapseAndTile(
const Fortran::parser::AccClauseList &clauseList) {
- uint64_t collapseLoopCount = 1;
+ uint64_t collapseLoopCount = getCollapseSizeAndForce(clauseList).first;
uint64_t tileLoopCount = 1;
for (const Fortran::parser::AccClause &clause : clauseList.v) {
- if (const auto *collapseClause =
- std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
- const parser::AccCollapseArg &arg = collapseClause->v;
- const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)};
- collapseLoopCount = *Fortran::semantics::GetIntValue(collapseValue);
- }
if (const auto *tileClause =
std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) {
const parser::AccTileExprList &tileExprList = tileClause->v;
- const std::list<parser::AccTileExpr> &listTileExpr = tileExprList.v;
- tileLoopCount = listTileExpr.size();
+ tileLoopCount = tileExprList.v.size();
}
}
- if (tileLoopCount > collapseLoopCount)
- return tileLoopCount;
- return collapseLoopCount;
+ return tileLoopCount > collapseLoopCount ? tileLoopCount : collapseLoopCount;
+}
+
+std::pair<uint64_t, bool> Fortran::lower::getCollapseSizeAndForce(
+ const Fortran::parser::AccClauseList &clauseList) {
+ uint64_t size = 1;
+ bool force = false;
+ for (const Fortran::parser::AccClause &clause : clauseList.v) {
+ if (const auto *collapseClause =
+ std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) {
+ const Fortran::parser::AccCollapseArg &arg = collapseClause->v;
+ force = std::get<bool>(arg.t);
+ const auto &collapseValue =
+ std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t);
+ size = *Fortran::semantics::GetIntValue(collapseValue);
+ break;
+ }
+ }
+ return {size, force};
}
/// Create an ACC loop operation for a DO construct when inside ACC compute
@@ -4921,6 +5108,8 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
reductionOperands;
llvm::SmallVector<mlir::Attribute> privatizationRecipes;
llvm::SmallVector<mlir::Type> retTy;
+ llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
+ dataOperandSymbolPairs;
mlir::Value yieldValue;
uint64_t loopsToProcess = 1; // Single loop construct
@@ -4929,9 +5118,10 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct(
Fortran::lower::StatementContext stmtCtx;
auto loopOp = buildACCLoopOp(
converter, converter.getCurrentLocation(), semanticsContext, stmtCtx,
- doConstruct, eval, privateOperands, privatizationRecipes, gangOperands,
- workerNumOperands, vectorOperands, tileOperands, cacheOperands,
- reductionOperands, retTy, yieldValue, loopsToProcess);
+ doConstruct, eval, privateOperands, privatizationRecipes,
+ dataOperandSymbolPairs, gangOperands, workerNumOperands, vectorOperands,
+ tileOperands, cacheOperands, reductionOperands, retTy, yieldValue,
+ loopsToProcess);
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
if (!privatizationRecipes.empty())
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 9e56c2b..bd94651 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -153,6 +153,7 @@ public:
clauseOps.loopLowerBounds = ops.loopLowerBounds;
clauseOps.loopUpperBounds = ops.loopUpperBounds;
clauseOps.loopSteps = ops.loopSteps;
+ clauseOps.collapseNumLoops = ops.collapseNumLoops;
ivOut.append(iv);
return true;
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 50603cb..4a05cd9 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3348,7 +3348,7 @@ struct DoConcurrentSpecifierOpConversion : public fir::FIROpConversion<OpTy> {
mlir::ConversionPatternRewriter &rewriter) const override {
#ifdef EXPENSIVE_CHECKS
auto uses = mlir::SymbolTable::getSymbolUses(
- specifier, specifier->getParentOfType<mlir::ModuleOp>());
+ specifier, specifier->template getParentOfType<mlir::ModuleOp>());
// `fir.local|fir.declare_reduction` ops are not supposed to have any uses
// at this point (i.e. during lowering to LLVM). In case of serialization,
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index e595e61..260e525 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -77,6 +77,10 @@ class MapInfoFinalizationPass
/// | |
std::map<mlir::Operation *, mlir::Value> localBoxAllocas;
+ // List of deferrable descriptors to process at the end of
+ // the pass.
+ llvm::SmallVector<mlir::Operation *> deferrableDesc;
+
/// Return true if the given path exists in a list of paths.
static bool
containsPath(const llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &paths,
@@ -183,6 +187,40 @@ class MapInfoFinalizationPass
newMemberIndexPaths.emplace_back(indexPath.begin(), indexPath.end());
}
+ // Check if the declaration operation we have refers to a dummy
+ // function argument.
+ bool isDummyArgument(mlir::Value mappedValue) {
+ if (auto declareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
+ mappedValue.getDefiningOp()))
+ if (auto dummyScope = declareOp.getDummyScope())
+ return true;
+ return false;
+ }
+
+ // Relevant for OpenMP < 5.2, where attach semantics and rules don't exist.
+ // As descriptors were an unspoken implementation detail in these versions
+ // there's certain cases where the user (and the compiler implementation)
+ // can create data mapping errors by having temporary descriptors stuck
+ // in memory. The main example is calling an 'target enter data map'
+ // without a corresponding exit on an assumed shape or size dummy
+ // argument, a local stack descriptor is generated, gets mapped and
+ // is then left on device. A user doesn't realize what they've done as
+ // the OpenMP specification isn't explicit on descriptor handling in
+ // earlier versions and as far as Fortran is concerned this si something
+ // hidden from a user. To avoid this we can defer the descriptor mapping
+ // in these cases until target or target data regions, when we can be
+ // sure they have a clear limited scope on device.
+ bool canDeferDescriptorMapping(mlir::Value descriptor) {
+ if (fir::isAllocatableType(descriptor.getType()) ||
+ fir::isPointerType(descriptor.getType()))
+ return false;
+ if (isDummyArgument(descriptor) &&
+ (fir::isAssumedType(descriptor.getType()) ||
+ fir::isAssumedShape(descriptor.getType())))
+ return true;
+ return false;
+ }
+
/// getMemberUserList gathers all users of a particular MapInfoOp that are
/// other MapInfoOp's and places them into the mapMemberUsers list, which
/// records the map that the current argument MapInfoOp "op" is part of
@@ -234,13 +272,16 @@ class MapInfoFinalizationPass
/// fir::BoxOffsetOp we utilise to access the descriptor datas
/// base address can be utilised.
mlir::Value getDescriptorFromBoxMap(mlir::omp::MapInfoOp boxMap,
- fir::FirOpBuilder &builder) {
+ fir::FirOpBuilder &builder,
+ bool &canDescBeDeferred) {
mlir::Value descriptor = boxMap.getVarPtr();
if (!fir::isTypeWithDescriptor(boxMap.getVarType()))
if (auto addrOp = mlir::dyn_cast_if_present<fir::BoxAddrOp>(
boxMap.getVarPtr().getDefiningOp()))
descriptor = addrOp.getVal();
+ canDescBeDeferred = canDeferDescriptorMapping(descriptor);
+
if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()) &&
!fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
return descriptor;
@@ -391,8 +432,7 @@ class MapInfoFinalizationPass
/// Check if the mapOp is present in the HasDeviceAddr clause on
/// the userOp. Only applies to TargetOp.
- bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation *userOp) {
- assert(userOp && "Expecting non-null argument");
+ bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) {
if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(userOp)) {
for (mlir::Value hda : targetOp.getHasDeviceAddrVars()) {
if (hda.getDefiningOp() == mapOp)
@@ -402,6 +442,26 @@ class MapInfoFinalizationPass
return false;
}
+ bool isUseDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) {
+ if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(userOp)) {
+ for (mlir::Value uda : targetDataOp.getUseDeviceAddrVars()) {
+ if (uda.getDefiningOp() == mapOp)
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool isUseDevicePtr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) {
+ if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(userOp)) {
+ for (mlir::Value udp : targetDataOp.getUseDevicePtrVars()) {
+ if (udp.getDefiningOp() == mapOp)
+ return true;
+ }
+ }
+ return false;
+ }
+
mlir::omp::MapInfoOp genBoxcharMemberMap(mlir::omp::MapInfoOp op,
fir::FirOpBuilder &builder) {
if (!op.getMembers().empty())
@@ -466,12 +526,14 @@ class MapInfoFinalizationPass
// TODO: map the addendum segment of the descriptor, similarly to the
// base address/data pointer member.
- mlir::Value descriptor = getDescriptorFromBoxMap(op, builder);
+ bool descCanBeDeferred = false;
+ mlir::Value descriptor =
+ getDescriptorFromBoxMap(op, builder, descCanBeDeferred);
mlir::ArrayAttr newMembersAttr;
mlir::SmallVector<mlir::Value> newMembers;
llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices;
- bool isHasDeviceAddrFlag = isHasDeviceAddr(op, target);
+ bool isHasDeviceAddrFlag = isHasDeviceAddr(op, *target);
if (!mapMemberUsers.empty() || !op.getMembers().empty())
getMemberIndicesAsVectors(
@@ -553,6 +615,10 @@ class MapInfoFinalizationPass
/*partial_map=*/builder.getBoolAttr(false));
op.replaceAllUsesWith(newDescParentMapOp.getResult());
op->erase();
+
+ if (descCanBeDeferred)
+ deferrableDesc.push_back(newDescParentMapOp);
+
return newDescParentMapOp;
}
@@ -701,6 +767,124 @@ class MapInfoFinalizationPass
return nullptr;
}
+ void addImplicitDescriptorMapToTargetDataOp(mlir::omp::MapInfoOp op,
+ fir::FirOpBuilder &builder,
+ mlir::Operation &target) {
+ // Checks if the map is present as an explicit map already on the target
+ // data directive, and not just present on a use_device_addr/ptr, as if
+ // that's the case, we should not need to add an implicit map for the
+ // descriptor.
+ auto explicitMappingPresent = [](mlir::omp::MapInfoOp op,
+ mlir::omp::TargetDataOp tarData) {
+ // Verify top-level descriptor mapping is at least equal with same
+ // varPtr, the map type should always be To for a descriptor, which is
+ // all we really care about for this mapping as we aim to make sure the
+ // descriptor is always present on device if we're expecting to access
+ // the underlying data.
+ if (tarData.getMapVars().empty())
+ return false;
+
+ for (mlir::Value mapVar : tarData.getMapVars()) {
+ auto mapOp = llvm::cast<mlir::omp::MapInfoOp>(mapVar.getDefiningOp());
+ if (mapOp.getVarPtr() == op.getVarPtr() &&
+ mapOp.getVarPtrPtr() == op.getVarPtrPtr()) {
+ return true;
+ }
+ }
+
+ return false;
+ };
+
+ // if we're not a top level descriptor with members (e.g. member of a
+ // derived type), we do not want to perform this step.
+ if (!llvm::isa<mlir::omp::TargetDataOp>(target) || op.getMembers().empty())
+ return;
+
+ if (!isUseDeviceAddr(op, target) && !isUseDevicePtr(op, target))
+ return;
+
+ auto targetDataOp = llvm::cast<mlir::omp::TargetDataOp>(target);
+ if (explicitMappingPresent(op, targetDataOp))
+ return;
+
+ mlir::omp::MapInfoOp newDescParentMapOp =
+ builder.create<mlir::omp::MapInfoOp>(
+ op->getLoc(), op.getResult().getType(), op.getVarPtr(),
+ op.getVarTypeAttr(),
+ builder.getIntegerAttr(
+ builder.getIntegerType(64, false),
+ llvm::to_underlying(
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)),
+ op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{},
+ mlir::SmallVector<mlir::Value>{}, mlir::ArrayAttr{},
+ /*bounds=*/mlir::SmallVector<mlir::Value>{},
+ /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+
+ targetDataOp.getMapVarsMutable().append({newDescParentMapOp});
+ }
+
+ void removeTopLevelDescriptor(mlir::omp::MapInfoOp op,
+ fir::FirOpBuilder &builder,
+ mlir::Operation *target) {
+ if (llvm::isa<mlir::omp::TargetOp, mlir::omp::TargetDataOp,
+ mlir::omp::DeclareMapperInfoOp>(target))
+ return;
+
+ // if we're not a top level descriptor with members (e.g. member of a
+ // derived type), we do not want to perform this step.
+ if (op.getMembers().empty())
+ return;
+
+ mlir::SmallVector<mlir::Value> members = op.getMembers();
+ mlir::omp::MapInfoOp baseAddr =
+ mlir::dyn_cast_or_null<mlir::omp::MapInfoOp>(
+ members.front().getDefiningOp());
+ assert(baseAddr && "Expected member to be MapInfoOp");
+ members.erase(members.begin());
+
+ llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices;
+ getMemberIndicesAsVectors(op, memberIndices);
+
+ // Can skip the extra processing if there's only 1 member as it'd
+ // be the base addresses, which we're promoting to the parent.
+ mlir::ArrayAttr membersAttr;
+ if (memberIndices.size() > 1) {
+ memberIndices.erase(memberIndices.begin());
+ membersAttr = builder.create2DI64ArrayAttr(memberIndices);
+ }
+
+ // VarPtrPtr is tied to detecting if something is a pointer in the later
+ // lowering currently, this at the moment comes tied with
+ // OMP_MAP_PTR_AND_OBJ being applied which breaks the problem this tries to
+ // solve by emitting a 8-byte mapping tied to the descriptor address (even
+ // if we only emit a single map). So we circumvent this by removing the
+ // varPtrPtr mapping, however, a side affect of this is we lose the
+ // additional load from the backend tied to this which is required for
+ // correctness and getting the correct address of the data to perform our
+ // mapping. So we do our load at this stage.
+ // TODO/FIXME: Tidy up the OMP_MAP_PTR_AND_OBJ and varPtrPtr being tied to
+ // if something is a pointer to try and tidy up the implementation a bit.
+ // This is an unfortunate complexity from push-back from upstream. We
+ // could also emit a load at this level for all base addresses as well,
+ // which in turn will simplify the later lowering a bit as well. But first
+ // need to see how well this alteration works.
+ auto loadBaseAddr =
+ builder.loadIfRef(op->getLoc(), baseAddr.getVarPtrPtr());
+ mlir::omp::MapInfoOp newBaseAddrMapOp =
+ builder.create<mlir::omp::MapInfoOp>(
+ op->getLoc(), loadBaseAddr.getType(), loadBaseAddr,
+ baseAddr.getVarTypeAttr(), baseAddr.getMapTypeAttr(),
+ baseAddr.getMapCaptureTypeAttr(), mlir::Value{}, members,
+ membersAttr, baseAddr.getBounds(),
+ /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+ op.replaceAllUsesWith(newBaseAddrMapOp.getResult());
+ op->erase();
+ baseAddr.erase();
+ }
+
// This pass executes on omp::MapInfoOp's containing descriptor based types
// (allocatables, pointers, assumed shape etc.) and expanding them into
// multiple omp::MapInfoOp's for each pointer member contained within the
@@ -730,6 +914,7 @@ class MapInfoFinalizationPass
// clear all local allocations we made for any boxes in any prior
// iterations from previous function scopes.
localBoxAllocas.clear();
+ deferrableDesc.clear();
// First, walk `omp.map.info` ops to see if any of them have varPtrs
// with an underlying type of fir.char<k, ?>, i.e a character
@@ -1010,6 +1195,36 @@ class MapInfoFinalizationPass
}
});
+ // Now that we've expanded all of our boxes into a descriptor and base
+ // address map where necessary, we check if the map owner is an
+ // enter/exit/target data directive, and if they are we drop the initial
+ // descriptor (top-level parent) and replace it with the
+ // base_address/data.
+ //
+ // This circumvents issues with stack allocated descriptors bound to
+ // device colliding which in Flang is rather trivial for a user to do by
+ // accident due to the rather pervasive local intermediate descriptor
+ // generation that occurs whenever you pass boxes around different scopes.
+ // In OpenMP 6+ mapping these would be a user error as the tools required
+ // to circumvent these issues are provided by the spec (ref_ptr/ptee map
+ // types), but in prior specifications these tools are not available and
+ // it becomes an implementation issue for us to solve.
+ //
+ // We do this by dropping the top-level descriptor which will be the stack
+ // descriptor when we perform enter/exit maps, as we don't want these to
+ // be bound until necessary which is when we utilise the descriptor type
+ // within a target region. At which point we map the relevant descriptor
+ // data and the runtime should correctly associate the data with the
+ // descriptor and bind together and allow clean mapping and execution.
+ for (auto *op : deferrableDesc) {
+ auto mapOp = llvm::dyn_cast<mlir::omp::MapInfoOp>(op);
+ mlir::Operation *targetUser = getFirstTargetUser(mapOp);
+ assert(targetUser && "expected user of map operation was not found");
+ builder.setInsertionPoint(mapOp);
+ removeTopLevelDescriptor(mapOp, builder, targetUser);
+ addImplicitDescriptorMapToTargetDataOp(mapOp, builder, *targetUser);
+ }
+
// Wait until after we have generated all of our maps to add them onto
// the target's block arguments, simplifying the process as there would be
// no need to avoid accidental duplicate additions.
diff --git a/flang/test/Examples/omp-atomic.f90 b/flang/test/Examples/omp-atomic.f90
index 5695b62..d7e0a1c 100644
--- a/flang/test/Examples/omp-atomic.f90
+++ b/flang/test/Examples/omp-atomic.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
! Check OpenMP 2.13.6 atomic Construct
diff --git a/flang/test/Examples/omp-declarative-directive.f90 b/flang/test/Examples/omp-declarative-directive.f90
index 4a9ad91..6e1b0be 100644
--- a/flang/test/Examples/omp-declarative-directive.f90
+++ b/flang/test/Examples/omp-declarative-directive.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
! Check OpenMP declarative directives
diff --git a/flang/test/Examples/omp-device-constructs.f90 b/flang/test/Examples/omp-device-constructs.f90
index 916f7c9..ae52f73 100644
--- a/flang/test/Examples/omp-device-constructs.f90
+++ b/flang/test/Examples/omp-device-constructs.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-!RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
+!RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
! Check OpenMP clause validity for the following directives:
! 2.10 Device constructs
diff --git a/flang/test/Examples/omp-in-reduction-clause.f90 b/flang/test/Examples/omp-in-reduction-clause.f90
index fc3fff5..ced6722 100644
--- a/flang/test/Examples/omp-in-reduction-clause.f90
+++ b/flang/test/Examples/omp-in-reduction-clause.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
! Check for IN_REDUCTION() clause on OpenMP constructs
diff --git a/flang/test/Examples/omp-nowait.f90 b/flang/test/Examples/omp-nowait.f90
index 091a952..1d8f9e0 100644
--- a/flang/test/Examples/omp-nowait.f90
+++ b/flang/test/Examples/omp-nowait.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
subroutine sb(n)
implicit none
diff --git a/flang/test/Examples/omp-order-clause.f90 b/flang/test/Examples/omp-order-clause.f90
index 8d1c3f4..976c750 100644
--- a/flang/test/Examples/omp-order-clause.f90
+++ b/flang/test/Examples/omp-order-clause.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
! Check for ORDER([order-modifier :]concurrent) clause on OpenMP constructs
diff --git a/flang/test/Examples/omp-sections.f90 b/flang/test/Examples/omp-sections.f90
index a6d2806..96de363 100644
--- a/flang/test/Examples/omp-sections.f90
+++ b/flang/test/Examples/omp-sections.f90
@@ -1,6 +1,6 @@
! REQUIRES: plugins, examples, shell
-! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
+! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s
subroutine omp_sections()
integer :: x
diff --git a/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90 b/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90
new file mode 100644
index 0000000..9d36f6a
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90
@@ -0,0 +1,601 @@
+! Test remapping of variables appearing in OpenACC data clauses
+! to the related acc dialect data operation result.
+
+! This tests checks how the hlfir.declare is recreated and used inside
+! the acc compute region.
+
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+module m
+interface
+subroutine takes_scalar(x)
+ real :: x
+end subroutine
+subroutine takes_scalar_character(c, l)
+ integer :: l
+ character(l) :: c
+end subroutine
+subroutine takes_explicit_cst_shape(x)
+ real :: x(100)
+end subroutine
+subroutine takes_explicit_shape(x, n)
+ real :: x(n)
+end subroutine
+subroutine takes_assumed_shape(x)
+ real :: x(:)
+end subroutine
+subroutine takes_pointer(x)
+ real, pointer :: x(:)
+end subroutine
+
+subroutine takes_optional_scalar(x)
+ real, optional :: x
+end subroutine
+subroutine takes_optional_explicit_cst_shape(x)
+ real, optional :: x(100)
+end subroutine
+subroutine takes_optional_explicit_shape(x, n)
+ real, optional :: x(n)
+end subroutine
+subroutine takes_optional_assumed_shape(x)
+ real, optional :: x(:)
+end subroutine
+subroutine takes_optional_pointer(x)
+ real, optional, pointer :: x(:)
+end subroutine
+end interface
+contains
+
+! ----------------------------- Test forwarding ------------------------------ !
+
+ subroutine test_scalar(x)
+ real :: x
+ !$acc parallel copy(x)
+ call takes_scalar(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_scalar_character(c, l)
+ integer :: l
+ character(l) :: c
+ !$acc parallel copy(x)
+ call takes_scalar_character(c, len(c))
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_cst_shape(x)
+ real :: x(100)
+ !$acc parallel copy(x)
+ call takes_explicit_cst_shape(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_explicit_shape(x, n)
+ real :: x(n)
+ !$acc parallel copy(x)
+ call takes_explicit_shape(x, size(x,dim=1))
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_assumed_shape(x, n)
+ real :: x(:)
+ !$acc parallel copy(x)
+ call takes_assumed_shape(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_contiguous_assumed_shape(x, n)
+ real, contiguous :: x(:)
+ !$acc parallel copy(x)
+ call takes_explicit_shape(x, size(x,dim=1))
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_pointer(x, n)
+ real, pointer :: x(:)
+ !$acc parallel copy(x)
+ call takes_pointer(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_using_both_results(x, n)
+ real :: x(n)
+ !$acc parallel copy(x)
+ ! using hlfir.declare result #0
+ call takes_assumed_shape(x)
+ ! using hlfir.declare result #1
+ call takes_explicit_shape(x, size(x,dim=1))
+ !$acc end parallel
+ end subroutine
+
+! ------------------------- Test array addressing ---------------------------- !
+
+ subroutine addressing_cst_shape(x)
+ real :: x(10, 20)
+ !$acc parallel copy(x)
+ call takes_scalar(x(2,3))
+ !$acc end parallel
+ end subroutine
+
+ subroutine addressing_explicit_shape(x, n, m)
+ real :: x(n, m)
+ !$acc parallel copy(x)
+ call takes_scalar(x(2,3))
+ !$acc end parallel
+ end subroutine
+
+ subroutine addressing_assumed_shape(x, n)
+ real :: x(:, :)
+ !$acc parallel copy(x)
+ call takes_scalar(x(2,3))
+ !$acc end parallel
+ end subroutine
+
+ subroutine addressing_contiguous_assumed_shape(x, n)
+ real, contiguous :: x(:, :)
+ !$acc parallel copy(x)
+ call takes_scalar(x(2,3))
+ !$acc end parallel
+ end subroutine
+
+ subroutine addressing_pointer(x)
+ real, pointer :: x(:, :)
+ !$acc parallel copy(x)
+ call takes_scalar(x(2,3))
+ !$acc end parallel
+ end subroutine
+
+! ------------------------ Test OPTIONAL handling ---------------------------- !
+
+ subroutine test_optional_scalar(x)
+ real, optional :: x
+ !$acc parallel copy(x)
+ call takes_optional_scalar(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_optional_explicit_cst_shape(x)
+ real, optional :: x(100)
+ !$acc parallel copy(x)
+ call takes_optional_explicit_cst_shape(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_optional_explicit_shape(x, n)
+ real, optional :: x(n)
+ !$acc parallel copy(x)
+ call takes_optional_explicit_shape(x, n)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_optional_assumed_shape(x)
+ real, optional :: x(:)
+ !$acc parallel copy(x)
+ call takes_optional_assumed_shape(x)
+ !$acc end parallel
+ end subroutine
+
+ subroutine test_optional_pointer(x)
+ real, optional, pointer :: x(:)
+ !$acc parallel copy(x)
+ call takes_optional_pointer(x)
+ !$acc end parallel
+ end subroutine
+
+end module
+
+! CHECK-LABEL: func.func @_QMmPtest_scalar(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<f32>) {
+! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QMmFtest_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_4]]#0) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<f32>) to varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_scalar_character(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "l"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalar_characterEl"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QMmFtest_scalar_characterEx"}
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QMmFtest_scalar_characterEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32
+! CHECK: %[[VAL_7:.*]] = arith.cmpi sgt, %[[VAL_5]], %[[VAL_6]] : i32
+! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_7]], %[[VAL_5]], %[[VAL_6]] : i32
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_4]]#0 typeparams %[[VAL_8]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalar_characterEc"} : (!fir.ref<!fir.char<1,?>>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>)
+! CHECK: %[[VAL_10:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.ref<f32>) {
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMmFtest_scalar_characterEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_scalar_character(%[[VAL_9]]#0, %[[VAL_12]]#0) fastmath<contract> : (!fir.boxchar<1>, !fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref<i32>, i1
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_10]] : !fir.ref<f32>) to varPtr(%[[VAL_3]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_cst_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_2]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+! CHECK: %[[VAL_4:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<100xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) {
+! CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_2]]) dummy_scope %[[VAL_5]] {uniq_name = "_QMmFtest_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+! CHECK: fir.call @_QPtakes_explicit_cst_shape(%[[VAL_6]]#0) fastmath<contract> : (!fir.ref<!fir.array<100xf32>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) to varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_explicit_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index
+! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_10:.*]] = acc.copyin var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[VAL_12:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_8]]) dummy_scope %[[VAL_12]] {uniq_name = "_QMmFtest_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (index) -> i64
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32
+! CHECK: %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_15]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_13]]#1, %[[VAL_16]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_16]]#1, %[[VAL_16]]#2 : !fir.ref<i32>, i1
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_assumed_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_3:.*]] = acc.copyin var(%[[VAL_2]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] skip_rebox {uniq_name = "_QMmFtest_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: fir.call @_QPtakes_assumed_shape(%[[VAL_5]]#0) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_3]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_2]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_contiguous_assumed_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x", fir.contiguous},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_contiguous_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[ARG0]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_3]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]]#1 : (index, index) -> !fir.shapeshift<1>
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_6]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFtest_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_8:.*]] = acc.copyin var(%[[VAL_7]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_8]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_9:.*]] = fir.box_addr %[[VAL_8]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_6]]) dummy_scope %[[VAL_10]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFtest_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64
+! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i32
+! CHECK: %[[VAL_14:.*]]:3 = hlfir.associate %[[VAL_13]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_11]]#1, %[[VAL_14]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_14]]#1, %[[VAL_14]]#2 : !fir.ref<i32>, i1
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_8]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_7]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_pointer(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_pointerEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFtest_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+! CHECK: %[[VAL_3:.*]] = acc.copyin varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
+! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFtest_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+! CHECK: fir.call @_QPtakes_pointer(%[[VAL_5]]#0) fastmath<contract> : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) to varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_using_both_results(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_using_both_resultsEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index
+! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_using_both_resultsEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_10:.*]] = acc.copyin var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+! CHECK: %[[VAL_12:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_8]]) dummy_scope %[[VAL_12]] {uniq_name = "_QMmFtest_using_both_resultsEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: fir.call @_QPtakes_assumed_shape(%[[VAL_13]]#0) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> ()
+! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (index) -> i64
+! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32
+! CHECK: %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_15]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
+! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_13]]#1, %[[VAL_16]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> ()
+! CHECK: hlfir.end_associate %[[VAL_16]]#1, %[[VAL_16]]#2 : !fir.ref<i32>, i1
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPaddressing_cst_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10x20xf32>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = arith.constant 10 : index
+! CHECK: %[[VAL_2:.*]] = arith.constant 20 : index
+! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2>
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_3]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_cst_shapeEx"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+! CHECK: %[[VAL_5:.*]] = acc.copyin varPtr(%[[VAL_4]]#0 : !fir.ref<!fir.array<10x20xf32>>) -> !fir.ref<!fir.array<10x20xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_5]] : !fir.ref<!fir.array<10x20xf32>>) {
+! CHECK: %[[VAL_6:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) dummy_scope %[[VAL_6]] {uniq_name = "_QMmFaddressing_cst_shapeEx"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>)
+! CHECK: %[[VAL_8:.*]] = arith.constant 2 : index
+! CHECK: %[[VAL_9:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_8]], %[[VAL_9]]) : (!fir.ref<!fir.array<10x20xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_10]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_5]] : !fir.ref<!fir.array<10x20xf32>>) to varPtr(%[[VAL_4]]#0 : !fir.ref<!fir.array<10x20xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPaddressing_explicit_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"},
+! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "m"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEm"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64
+! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> index
+! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_7:.*]] = arith.cmpi sgt, %[[VAL_5]], %[[VAL_6]] : index
+! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_7]], %[[VAL_5]], %[[VAL_6]] : index
+! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64
+! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i64) -> index
+! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index
+! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index
+! CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_8]], %[[VAL_14]] : (index, index) -> !fir.shape<2>
+! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_15]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_17:.*]] = acc.copyin var(%[[VAL_16]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_17]] : !fir.box<!fir.array<?x?xf32>>) {
+! CHECK: %[[VAL_18:.*]] = fir.box_addr %[[VAL_17]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
+! CHECK: %[[VAL_19:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_15]]) dummy_scope %[[VAL_19]] {uniq_name = "_QMmFaddressing_explicit_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_21:.*]] = arith.constant 2 : index
+! CHECK: %[[VAL_22:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_21]], %[[VAL_22]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_23]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_17]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_16]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPaddressing_assumed_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_assumed_shapeEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_3:.*]] = acc.copyin var(%[[VAL_2]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.box<!fir.array<?x?xf32>>) {
+! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] skip_rebox {uniq_name = "_QMmFaddressing_assumed_shapeEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_6:.*]] = arith.constant 2 : index
+! CHECK: %[[VAL_7:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_6]], %[[VAL_7]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_8]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_3]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_2]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPaddressing_contiguous_assumed_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x", fir.contiguous},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[ARG0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
+! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index)
+! CHECK: %[[VAL_8:.*]] = arith.constant 1 : index
+! CHECK: %[[VAL_9:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]]#1, %[[VAL_8]], %[[VAL_7]]#1 : (index, index, index, index) -> !fir.shapeshift<2>
+! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_9]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_11:.*]] = acc.copyin var(%[[VAL_10]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_11]] : !fir.box<!fir.array<?x?xf32>>) {
+! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>>
+! CHECK: %[[VAL_13:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_9]]) dummy_scope %[[VAL_13]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>)
+! CHECK: %[[VAL_15:.*]] = arith.constant 2 : index
+! CHECK: %[[VAL_16:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_15]], %[[VAL_16]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32>
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_17]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_11]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_10]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPaddressing_pointer(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> {fir.bindc_name = "x"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFaddressing_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>)
+! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) {
+! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFaddressing_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>)
+! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>
+! CHECK: %[[VAL_6:.*]] = arith.constant 2 : index
+! CHECK: %[[VAL_7:.*]] = arith.constant 3 : index
+! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]] (%[[VAL_6]], %[[VAL_7]]) : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index) -> !fir.ref<f32>
+! CHECK: fir.call @_QPtakes_scalar(%[[VAL_8]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) to varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_optional_scalar(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<f32>) {
+! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>)
+! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.ref<f32>) -> i1
+! CHECK: %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.ref<f32>) {
+! CHECK: fir.result %[[VAL_4]]#0 : !fir.ref<f32>
+! CHECK: } else {
+! CHECK: %[[VAL_7:.*]] = fir.absent !fir.ref<f32>
+! CHECK: fir.result %[[VAL_7]] : !fir.ref<f32>
+! CHECK: }
+! CHECK: fir.call @_QPtakes_optional_scalar(%[[VAL_6]]) fastmath<contract> : (!fir.ref<f32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<f32>) to varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_optional_explicit_cst_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]] = arith.constant 100 : index
+! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_2]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+! CHECK: %[[VAL_4:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<100xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) {
+! CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_2]]) dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
+! CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_6]]#0 : (!fir.ref<!fir.array<100xf32>>) -> i1
+! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (!fir.ref<!fir.array<100xf32>>) {
+! CHECK: fir.result %[[VAL_6]]#0 : !fir.ref<!fir.array<100xf32>>
+! CHECK: } else {
+! CHECK: %[[VAL_9:.*]] = fir.absent !fir.ref<!fir.array<100xf32>>
+! CHECK: fir.result %[[VAL_9]] : !fir.ref<!fir.array<100xf32>>
+! CHECK: }
+! CHECK: fir.call @_QPtakes_optional_explicit_cst_shape(%[[VAL_8]]) fastmath<contract> : (!fir.ref<!fir.array<100xf32>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) to varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_optional_explicit_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x", fir.optional},
+! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_optional_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64
+! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index
+! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index
+! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index
+! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1>
+! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_10:.*]] = acc.copyin varPtr(%[[VAL_9]]#1 : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.ref<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_11:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_8]]) dummy_scope %[[VAL_11]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
+! CHECK: %[[VAL_13:.*]] = fir.is_present %[[VAL_12]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1
+! CHECK: %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.ref<!fir.array<?xf32>>) {
+! CHECK: fir.result %[[VAL_12]]#1 : !fir.ref<!fir.array<?xf32>>
+! CHECK: } else {
+! CHECK: %[[VAL_15:.*]] = fir.absent !fir.ref<!fir.array<?xf32>>
+! CHECK: fir.result %[[VAL_15]] : !fir.ref<!fir.array<?xf32>>
+! CHECK: }
+! CHECK: fir.call @_QPtakes_optional_explicit_shape(%[[VAL_14]], %[[VAL_1]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_10]] : !fir.ref<!fir.array<?xf32>>) to varPtr(%[[VAL_9]]#1 : !fir.ref<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_optional_assumed_shape(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_2:.*]] = acc.copyin var(%[[VAL_1]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.box<!fir.array<?xf32>>) {
+! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] skip_rebox {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>)
+! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1
+! CHECK: %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.box<!fir.array<?xf32>>) {
+! CHECK: fir.result %[[VAL_4]]#0 : !fir.box<!fir.array<?xf32>>
+! CHECK: } else {
+! CHECK: %[[VAL_7:.*]] = fir.absent !fir.box<!fir.array<?xf32>>
+! CHECK: fir.result %[[VAL_7]] : !fir.box<!fir.array<?xf32>>
+! CHECK: }
+! CHECK: fir.call @_QPtakes_optional_assumed_shape(%[[VAL_6]]) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accVar(%[[VAL_2]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_1]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
+
+! CHECK-LABEL: func.func @_QMmPtest_optional_pointer(
+! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "x", fir.optional}) {
+! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional, pointer>, uniq_name = "_QMmFtest_optional_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {
+! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<optional, pointer>, uniq_name = "_QMmFtest_optional_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>)
+! CHECK: fir.call @_QPtakes_optional_pointer(%[[VAL_4]]#0) fastmath<contract> : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> ()
+! CHECK: acc.yield
+! CHECK: }
+! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) to varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"}
+! CHECK: return
+! CHECK: }
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
index bc94837b..429f207 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
@@ -41,19 +41,21 @@ module m_firstprivate_derived_alloc_comp
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_6:.*]] = acc.firstprivate varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> {name = "a"}
! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_alloc_compTpoint -> %[[VAL_6]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) {
-! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>)
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_10:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_10]] : !fir.ref<i32>) control(%[[VAL_12:.*]] : i32) = (%[[VAL_7]] : i32) to (%[[VAL_8]] : i32) step (%[[VAL_9]] : i32) {
-! CHECK: fir.store %[[VAL_12]] to %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_1]]#0{"x"} {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
-! CHECK: %[[VAL_16:.*]] = arith.constant 10 : index
-! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_17]] : f32, !fir.ref<f32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) {
+! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
+! CHECK: %[[VAL_18:.*]] = arith.constant 10 : index
+! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_19]] : f32, !fir.ref<f32>
! CHECK: acc.yield
! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
! CHECK: acc.yield
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90
index f18d722..9ef4fe6 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90
@@ -41,19 +41,21 @@ module m_firstprivate_derived_ptr_comp
! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_6:.*]] = acc.firstprivate varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>> {name = "a"}
! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_ptr_compTpoint -> %[[VAL_6]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) {
-! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>)
! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_10:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_10]] : !fir.ref<i32>) control(%[[VAL_12:.*]] : i32) = (%[[VAL_7]] : i32) to (%[[VAL_8]] : i32) step (%[[VAL_9]] : i32) {
-! CHECK: fir.store %[[VAL_12]] to %[[VAL_11]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_1]]#0{"x"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
-! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
-! CHECK: %[[VAL_16:.*]] = arith.constant 10 : index
-! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
-! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_17]] : f32, !fir.ref<f32>
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) {
+! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>
+! CHECK: %[[VAL_18:.*]] = arith.constant 10 : index
+! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_19]] : f32, !fir.ref<f32>
! CHECK: acc.yield
! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
! CHECK: acc.yield
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90
index f389c46..e90ec32 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90
@@ -55,16 +55,17 @@ module m_firstprivate_derived_user_def
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_7:.*]] = acc.firstprivate varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>> {name = "a"}
! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_user_defTpoint -> %[[VAL_7]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) {
-! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_11:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_11]] : !fir.ref<i32>) control(%[[VAL_13:.*]] : i32) = (%[[VAL_8]] : i32) to (%[[VAL_9]] : i32) step (%[[VAL_10]] : i32) {
-! CHECK: fir.store %[[VAL_13]] to %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_14:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_2]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32>
-! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_15]] : f32, !fir.ref<f32>
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) {
+ ! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32>
! CHECK: acc.yield
! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
! CHECK: acc.yield
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90
index 677c3ae..e91fc9b 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90
@@ -41,16 +41,17 @@ module m_firstprivate_derived
! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QMm_firstprivate_derivedFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_7:.*]] = acc.firstprivate varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>> {name = "a"}
! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derivedTpoint -> %[[VAL_7]] : !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) {
-! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32
-! CHECK: %[[VAL_11:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QMm_firstprivate_derivedFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_11]] : !fir.ref<i32>) control(%[[VAL_13:.*]] : i32) = (%[[VAL_8]] : i32) to (%[[VAL_9]] : i32) step (%[[VAL_10]] : i32) {
-! CHECK: fir.store %[[VAL_13]] to %[[VAL_12]]#0 : !fir.ref<i32>
-! CHECK: %[[VAL_14:.*]] = arith.constant 1.000000e+00 : f32
-! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_2]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32>
-! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_15]] : f32, !fir.ref<f32>
+! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derivedFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>)
+! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32
+! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) {
+ ! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derivedFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32>
+! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32
+! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32>
+! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32>
! CHECK: acc.yield
! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
! CHECK: acc.yield
diff --git a/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90 b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90
new file mode 100644
index 0000000..ca932c1
--- /dev/null
+++ b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90
@@ -0,0 +1,41 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
+
+! Verify collapse(force:2) sinks prologue (between loops) and epilogue (after inner loop)
+! into the acc.loop region body.
+
+subroutine collapse_force_sink(n, m)
+ integer, intent(in) :: n, m
+ real, dimension(n,m) :: a
+ real, dimension(n) :: bb, cc
+ integer :: i, j
+
+ !$acc parallel loop collapse(force:2)
+ do i = 1, n
+ bb(i) = 4.2 ! prologue (between loops)
+ do j = 1, m
+ a(i,j) = a(i,j) + 2.0
+ end do
+ cc(i) = 7.3 ! epilogue (after inner loop)
+ end do
+ !$acc end parallel loop
+end subroutine
+
+! CHECK: func.func @_QPcollapse_force_sink(
+! CHECK: acc.parallel
+! Ensure outer acc.loop is combined(parallel)
+! CHECK: acc.loop combined(parallel)
+! Prologue: constant 4.2 and an assign before inner loop
+! CHECK: arith.constant 4.200000e+00
+! CHECK: hlfir.assign
+! Inner loop and its body include 2.0 add and an assign
+! CHECK: acc.loop
+! CHECK: arith.constant 2.000000e+00
+! CHECK: arith.addf
+! CHECK: hlfir.assign
+! Epilogue: constant 7.3 and an assign after inner loop
+! CHECK: arith.constant 7.300000e+00
+! CHECK: hlfir.assign
+! And the outer acc.loop has collapse = [2]
+! CHECK: } attributes {collapse = [2]
+
+
diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90
index 85394e4..af11b34 100644
--- a/flang/test/Lower/OpenACC/acc-loop-exit.f90
+++ b/flang/test/Lower/OpenACC/acc-loop-exit.f90
@@ -16,8 +16,8 @@ end
! CHECK-LABEL: func.func @_QPsub1
! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[EXIT_COND:.*]] = acc.loop
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: ^bb{{.*}}:
! CHECK: ^bb{{.*}}:
! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index 5ca08a3..d37eb8d 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -426,7 +426,7 @@ end
! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.parallel
! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop {{.*}} private(@privatization_ref_i32 -> %[[PRIV_I]] : !fir.ref<i32>) control(%[[IV0:.*]] : i32) = (%c1{{.*}} : i32) to (%c10{{.*}} : i32) step (%c1{{.*}} : i32)
+! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %[[IV0]] to %[[DECL_PRIV_I]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[DECL_PRIV_I]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
index a75a022..eaf734f 100644
--- a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
+++ b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90
@@ -19,8 +19,8 @@ subroutine basic_do_loop()
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -48,8 +48,8 @@ subroutine basic_do_concurrent()
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -77,8 +77,8 @@ subroutine basic_do_loop_parallel()
! CHECK: acc.parallel {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -106,8 +106,8 @@ subroutine basic_do_loop_serial()
! CHECK: acc.serial {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -135,8 +135,8 @@ subroutine basic_do_concurrent_parallel()
! CHECK: acc.parallel {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -164,8 +164,8 @@ subroutine basic_do_concurrent_serial()
! CHECK: acc.serial {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -195,10 +195,10 @@ subroutine multi_dimension_do_concurrent()
! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
! CHECK-DAG: %[[PRIVATE_K:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "k"}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFmulti_dimension_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFmulti_dimension_do_concurrentEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK-DAG: %[[PRIVATE_K_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFmulti_dimension_do_concurrentEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
-! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
! CHECK: fir.store %{{.*}} to %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32>
@@ -235,12 +235,12 @@ subroutine nested_do_loops()
! CHECK: acc.kernels {
! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
@@ -272,8 +272,8 @@ subroutine variable_bounds_and_step(n, start_val, step_val)
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32>
@@ -315,22 +315,22 @@ subroutine different_iv_types()
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_I8:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "i8"}
-! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
! CHECK: acc.loop private(@privatization_ref_i64 -> %[[PRIVATE_I8]] : !fir.ref<i64>) control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64)
+! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64>
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_I4:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i4"}
-! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I4]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32>
! CHECK: acc.kernels {
! CHECK: %[[PRIVATE_I2:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i16>) -> !fir.ref<i16> {implicit = true, name = "i2"}
-! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
! CHECK: acc.loop private(@privatization_ref_i16 -> %[[PRIVATE_I2]] : !fir.ref<i16>) control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16)
+! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16>
@@ -362,12 +362,12 @@ subroutine nested_loop_with_reduction(x, y)
! CHECK: %[[REDUCTION_X:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "x"}
! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "y"}
! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
-! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_i32 -> %[[REDUCTION_X]] : !fir.ref<i32>, @reduction_add_ref_i32 -> %[[REDUCTION_Y]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"}
-! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32)
+! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32>
! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32
diff --git a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
index 142bc02a..c769152 100644
--- a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
+++ b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90
@@ -112,7 +112,7 @@ integer :: i,j
! CHECK: omp.distribute {
! CHECK: omp.wsloop {
! CHECK: omp.simd private({{.*}}) {
-! CHECK: omp.loop_nest (%[[I_IV:.*]], %[[J_IV:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) {
+! CHECK: omp.loop_nest (%[[I_IV:.*]], %[[J_IV:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) collapse(2) {
! CHECK: %[[Y_MAX_PRIV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}y_max"}
! CHECK: %[[I_UB:.*]] = fir.load %[[X_MAX_MAPPED]]#0 : !fir.ref<i32>
diff --git a/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90
new file mode 100644
index 0000000..daea2f3
--- /dev/null
+++ b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90
@@ -0,0 +1,96 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+
+! This test checks that the descriptor deferral behaviour of the
+! MapInfoFinalization pass is preserved. Descriptor deferral is the
+! act of removing the mapping of the descriptor in certain cases when
+! a descriptor carrying type is mapped. This only applies in certain
+! cases and to assumed shape and size dummy arguments that are not
+! allocatable or pointers.
+
+subroutine assume_map_target_enter_exit(assumed_arr)
+ integer :: assumed_arr(:)
+ !$omp target enter data map(to: assumed_arr)
+ !$omp target
+ assumed_arr(1) = 10
+ !$omp end target
+ !$omp target exit data map(from: assumed_arr)
+end subroutine
+
+!CHECK-LABEL: func.func @_QPassume_map_target_enter_exit(
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[LOAD_BOX:.*]] = fir.load %[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%[[LOAD_BOX]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(to) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"}
+!CHECK: omp.target_enter_data map_entries(%[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>)
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(implicit, to) capture(ByRef) members(%{{.*}} : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"}
+!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[LOAD_BOX:.*]] = fir.load %[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%[[LOAD_BOX]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(from) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"}
+!CHECK: omp.target_exit_data map_entries(%[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>)
+
+subroutine assume_alloca_map_target_enter_exit(assumed_arr)
+ integer, allocatable :: assumed_arr(:)
+ !$omp target enter data map(to: assumed_arr)
+ !$omp target
+ assumed_arr(1) = 10
+ !$omp end target
+ !$omp target exit data map(from: assumed_arr)
+end subroutine
+
+!CHECK-LABEL: func.func @_QPassume_alloca_map_target_enter_exit(
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>)
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>)
+
+subroutine assume_pointer_map_target_enter_exit(assumed_arr)
+ integer, pointer :: assumed_arr(:)
+ !$omp target enter data map(to: assumed_arr)
+ !$omp target
+ assumed_arr(1) = 10
+ !$omp end target
+ !$omp target exit data map(from: assumed_arr)
+end subroutine
+
+!CHECK-LABEL: func.func @_QPassume_pointer_map_target_enter_exit(
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>)
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"}
+!CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>)
+
+subroutine assume_map_target_data(assumed_arr)
+ integer :: assumed_arr(:)
+ !$omp target data map(to: assumed_arr)
+ !$omp target
+ assumed_arr(1) = 10
+ !$omp end target
+ !$omp end target data
+end subroutine
+
+!CHECK-LABEL: func.func @_QPassume_map_target_data(
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(to) capture(ByRef) members(%[[MAP_ADDR]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"}
+!CHECK: omp.target_data map_entries(%[[MAP_BOX]], %[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
+!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>
+!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""}
+!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(implicit, to) capture(ByRef) members(%[[MAP_ADDR]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"}
+!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) {
diff --git a/flang/test/Lower/generic-shadows-specific.F90 b/flang/test/Lower/generic-shadows-specific.F90
new file mode 100644
index 0000000..e721908
--- /dev/null
+++ b/flang/test/Lower/generic-shadows-specific.F90
@@ -0,0 +1,40 @@
+
+#if STEP == 1
+! these modules must be read from module files
+module generic_shadows_specific_m1
+ interface f ! reference must be to generic
+ module procedure f ! must have same name as generic interface
+ end interface
+ contains
+ character function f() ! must be character
+ f = 'q'
+ end
+end
+module generic_shadows_specific_m2
+ use generic_shadows_specific_m1
+end
+module generic_shadows_specific_m3
+ use generic_shadows_specific_m2 ! must be generic_shadows_specific_m2, not generic_shadows_specific_m1
+ contains
+ subroutine mustExist() ! not called, but must exist
+ character x
+ x = f()
+ end
+end
+
+#else
+! Check that expected code produced with no crash.
+subroutine reproducer()
+ use generic_shadows_specific_m2
+ use generic_shadows_specific_m3
+ character x
+ x = f()
+end
+#endif
+
+!RUN: rm -rf %t && mkdir -p %t
+!RUN: %flang_fc1 -fsyntax-only -DSTEP=1 -J%t %s
+!RUN: %flang_fc1 -emit-fir -J%t -o - %s | FileCheck %s
+
+!CHECK-LABEL: func.func @_QPreproducer
+!CHECK: fir.call @_QMgeneric_shadows_specific_m1Pf
diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir
index ed814cd..7bc0ae4 100644
--- a/flang/test/Transforms/omp-map-info-finalization.fir
+++ b/flang/test/Transforms/omp-map-info-finalization.fir
@@ -326,15 +326,15 @@ func.func @_QPreuse_alloca(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name =
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
// CHECK: omp.target_data map_entries
-// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
-// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]]
+// CHECK: %[[BOX_OFFSET:.*]] = fir.box_offset %[[ALLOCA]]
+// CHECK: %[[LOAD_OFFSET:.*]] = fir.load %[[BOX_OFFSET]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>>
+// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[LOAD_OFFSET]]
// CHECK: omp.target_update map_entries
// CHECK: omp.terminator
// CHECK: }
// CHECK: return
-
omp.private {type = firstprivate} @boxchar.privatizer : !fir.boxchar<1> copy {
^bb0(%arg0: !fir.boxchar<1>, %arg1: !fir.boxchar<1>):
omp.yield(%arg0 : !fir.boxchar<1>)