diff options
Diffstat (limited to 'flang')
29 files changed, 1463 insertions, 183 deletions
diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 0ffe27e..f8322a5 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -123,6 +123,10 @@ public: virtual Fortran::lower::SymMap::StorageDesc getSymbolStorage(SymbolRef sym) = 0; + /// Return the Symbol Map used to map semantics::Symbol to their SSA + /// values in the generated MLIR. + virtual Fortran::lower::SymMap &getSymbolMap() = 0; + /// Override lowering of expression with pre-lowered values. /// Associate mlir::Value to evaluate::Expr. All subsequent call to /// genExprXXX() will replace any occurrence of an overridden diff --git a/flang/include/flang/Lower/OpenACC.h b/flang/include/flang/Lower/OpenACC.h index 4622dbc..69f1f5b 100644 --- a/flang/include/flang/Lower/OpenACC.h +++ b/flang/include/flang/Lower/OpenACC.h @@ -122,6 +122,11 @@ void genOpenACCTerminator(fir::FirOpBuilder &, mlir::Operation *, /// clause. uint64_t getLoopCountForCollapseAndTile(const Fortran::parser::AccClauseList &); +/// Parse collapse clause and return {size, force}. If absent, returns +/// {1,false}. +std::pair<uint64_t, bool> +getCollapseSizeAndForce(const Fortran::parser::AccClauseList &); + /// Checks whether the current insertion point is inside OpenACC loop. bool isInOpenACCLoop(fir::FirOpBuilder &); diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index 1443e93..d919b77 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3517,7 +3517,9 @@ struct OmpObject { std::variant<Designator, /*common block*/ Name, Invalid> u; }; -WRAPPER_CLASS(OmpObjectList, std::list<OmpObject>); +struct OmpObjectList { + WRAPPER_CLASS_BOILERPLATE(OmpObjectList, std::list<OmpObject>); +}; // Ref: [4.5:201-207], [5.0:293-299], [5.1:325-331], [5.2:124] // @@ -3547,14 +3549,18 @@ struct OmpTypeSpecifier { std::variant<TypeSpec, DeclarationTypeSpec> u; }; -WRAPPER_CLASS(OmpTypeNameList, std::list<OmpTypeSpecifier>); +struct OmpTypeNameList { + WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list<OmpTypeSpecifier>); +}; struct OmpLocator { UNION_CLASS_BOILERPLATE(OmpLocator); std::variant<OmpObject, FunctionReference> u; }; -WRAPPER_CLASS(OmpLocatorList, std::list<OmpLocator>); +struct OmpLocatorList { + WRAPPER_CLASS_BOILERPLATE(OmpLocatorList, std::list<OmpLocator>); +}; // Ref: [4.5:58-60], [5.0:58-60], [5.1:63-68], [5.2:197-198], [6.0:334-336] // @@ -4324,7 +4330,9 @@ struct OmpIteration { // // iteration-vector -> // [iteration...] // since 4.5 -WRAPPER_CLASS(OmpIterationVector, std::list<OmpIteration>); +struct OmpIterationVector { + WRAPPER_CLASS_BOILERPLATE(OmpIterationVector, std::list<OmpIteration>); +}; // Extract this into a separate structure (instead of having it directly in // OmpDoacrossClause), so that the context in TYPE_CONTEXT_PARSER can be set @@ -4364,14 +4372,18 @@ struct OmpDependClause { // // doacross-clause -> // DOACROSS(dependence-type: iteration-vector) // since 5.2 -WRAPPER_CLASS(OmpDoacrossClause, OmpDoacross); +struct OmpDoacrossClause { + WRAPPER_CLASS_BOILERPLATE(OmpDoacrossClause, OmpDoacross); +}; // Ref: [5.0:254-255], [5.1:287-288], [5.2:73] // // destroy-clause -> // DESTROY | // since 5.0, until 5.1 // DESTROY(variable) // since 5.2 -WRAPPER_CLASS(OmpDestroyClause, OmpObject); +struct OmpDestroyClause { + WRAPPER_CLASS_BOILERPLATE(OmpDestroyClause, OmpObject); +}; // Ref: [5.0:135-140], [5.1:161-166], [5.2:265-266] // @@ -4785,7 +4797,9 @@ struct OmpInitClause { // REF: [5.1:217-220], [5.2:294] // // 14.1.3 use-clause -> USE (interop-var) -WRAPPER_CLASS(OmpUseClause, OmpObject); +struct OmpUseClause { + WRAPPER_CLASS_BOILERPLATE(OmpUseClause, OmpObject); +}; // OpenMP Clauses struct OmpClause { diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 975423b..98e2a5f 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -1126,6 +1126,9 @@ inline const DeclTypeSpec *Symbol::GetTypeImpl(int depth) const { [&](const HostAssocDetails &x) { return x.symbol().GetTypeImpl(depth); }, + [&](const GenericDetails &x) { + return x.specific() ? x.specific()->GetTypeImpl(depth) : nullptr; + }, [](const auto &) -> const DeclTypeSpec * { return nullptr; }, }, details_); diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 780d56f..50a687c 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -643,6 +643,8 @@ public: return localSymbols.lookupStorage(sym); } + Fortran::lower::SymMap &getSymbolMap() override final { return localSymbols; } + void overrideExprValues(const Fortran::lower::ExprToValueMap *map) override final { exprValueOverrides = map; @@ -3190,15 +3192,20 @@ private: std::get_if<Fortran::parser::OpenACCCombinedConstruct>(&acc.u); Fortran::lower::pft::Evaluation *curEval = &getEval(); + // Determine collapse depth/force and loopCount + bool collapseForce = false; + uint64_t collapseDepth = 1; + uint64_t loopCount = 1; if (accLoop || accCombined) { - uint64_t loopCount; if (accLoop) { const Fortran::parser::AccBeginLoopDirective &beginLoopDir = std::get<Fortran::parser::AccBeginLoopDirective>(accLoop->t); const Fortran::parser::AccClauseList &clauseList = std::get<Fortran::parser::AccClauseList>(beginLoopDir.t); loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList); + std::tie(collapseDepth, collapseForce) = + Fortran::lower::getCollapseSizeAndForce(clauseList); } else if (accCombined) { const Fortran::parser::AccBeginCombinedDirective &beginCombinedDir = std::get<Fortran::parser::AccBeginCombinedDirective>( @@ -3206,6 +3213,8 @@ private: const Fortran::parser::AccClauseList &clauseList = std::get<Fortran::parser::AccClauseList>(beginCombinedDir.t); loopCount = Fortran::lower::getLoopCountForCollapseAndTile(clauseList); + std::tie(collapseDepth, collapseForce) = + Fortran::lower::getCollapseSizeAndForce(clauseList); } if (curEval->lowerAsStructured()) { @@ -3215,8 +3224,63 @@ private: } } - for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) - genFIR(e); + const bool isStructured = curEval && curEval->lowerAsStructured(); + if (isStructured && collapseForce && collapseDepth > 1) { + // force: collect prologue/epilogue for the first collapseDepth nested + // loops and sink them into the innermost loop body at that depth + llvm::SmallVector<Fortran::lower::pft::Evaluation *> prologue, epilogue; + Fortran::lower::pft::Evaluation *parent = &getEval(); + Fortran::lower::pft::Evaluation *innermostLoopEval = nullptr; + for (uint64_t lvl = 0; lvl + 1 < collapseDepth; ++lvl) { + epilogue.clear(); + auto &kids = parent->getNestedEvaluations(); + // Collect all non-loop statements before the next inner loop as + // prologue, then mark remaining siblings as epilogue and descend into + // the inner loop. + Fortran::lower::pft::Evaluation *childLoop = nullptr; + for (auto it = kids.begin(); it != kids.end(); ++it) { + if (it->getIf<Fortran::parser::DoConstruct>()) { + childLoop = &*it; + for (auto it2 = std::next(it); it2 != kids.end(); ++it2) + epilogue.push_back(&*it2); + break; + } + prologue.push_back(&*it); + } + // Semantics guarantees collapseDepth does not exceed nest depth + // so childLoop must be found here. + assert(childLoop && "Expected inner DoConstruct for collapse"); + parent = childLoop; + innermostLoopEval = childLoop; + } + + // Track sunk evaluations (avoid double-lowering) + llvm::SmallPtrSet<const Fortran::lower::pft::Evaluation *, 16> sunk; + for (auto *e : prologue) + sunk.insert(e); + for (auto *e : epilogue) + sunk.insert(e); + + auto sink = + [&](llvm::SmallVector<Fortran::lower::pft::Evaluation *> &lst) { + for (auto *e : lst) + genFIR(*e); + }; + + sink(prologue); + + // Lower innermost loop body, skipping sunk + for (Fortran::lower::pft::Evaluation &e : + innermostLoopEval->getNestedEvaluations()) + if (!sunk.contains(&e)) + genFIR(e); + + sink(epilogue); + } else { + // Normal lowering + for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) + genFIR(e); + } localSymbols.popScope(); builder->restoreInsertionPoint(insertPt); diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 4a9e494..62e5c0c 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -20,6 +20,7 @@ #include "flang/Lower/PFTBuilder.h" #include "flang/Lower/StatementContext.h" #include "flang/Lower/Support/Utils.h" +#include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Complex.h" #include "flang/Optimizer/Builder/FIRBuilder.h" @@ -33,6 +34,7 @@ #include "flang/Semantics/scope.h" #include "flang/Semantics/tools.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/IR/IRMapping.h" #include "mlir/IR/MLIRContext.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/STLExtras.h" @@ -60,6 +62,16 @@ static llvm::cl::opt<bool> lowerDoLoopToAccLoop( llvm::cl::desc("Whether to lower do loops as `acc.loop` operations."), llvm::cl::init(true)); +static llvm::cl::opt<bool> enableSymbolRemapping( + "openacc-remap-symbols", + llvm::cl::desc("Whether to remap symbols that appears in data clauses."), + llvm::cl::init(true)); + +static llvm::cl::opt<bool> enableDevicePtrRemap( + "openacc-remap-device-ptr-symbols", + llvm::cl::desc("sub-option of openacc-remap-symbols for deviceptr clause"), + llvm::cl::init(false)); + // Special value for * passed in device_type or gang clauses. static constexpr std::int64_t starCst = -1; @@ -624,17 +636,19 @@ void genAtomicCapture(Fortran::lower::AbstractConverter &converter, } template <typename Op> -static void -genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semanticsContext, - Fortran::lower::StatementContext &stmtCtx, - llvm::SmallVectorImpl<mlir::Value> &dataOperands, - mlir::acc::DataClause dataClause, bool structured, - bool implicit, llvm::ArrayRef<mlir::Value> async, - llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, - llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, - bool setDeclareAttr = false) { +static void genDataOperandOperations( + const Fortran::parser::AccObjectList &objectList, + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + llvm::SmallVectorImpl<mlir::Value> &dataOperands, + mlir::acc::DataClause dataClause, bool structured, bool implicit, + llvm::ArrayRef<mlir::Value> async, + llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, + llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, + bool setDeclareAttr = false, + llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + *symbolPairs = nullptr) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; const bool unwrapBoxAddr = true; @@ -655,6 +669,9 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, /*strideIncludeLowerExtent=*/strideIncludeLowerExtent); LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs())); + bool isWholeSymbol = + !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator); + // If the input value is optional and is not a descriptor, we use the // rawInput directly. mlir::Value baseAddr = ((fir::unwrapRefType(info.addr.getType()) != @@ -668,6 +685,11 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, asyncOnlyDeviceTypes, unwrapBoxAddr, info.isPresent); dataOperands.push_back(op.getAccVar()); + // Track the symbol and its corresponding mlir::Value if requested + if (symbolPairs && isWholeSymbol) + symbolPairs->emplace_back(op.getAccVar(), + Fortran::semantics::SymbolRef(symbol)); + // For UseDeviceOp, if operand is one of a pair resulting from a // declare operation, create a UseDeviceOp for the other operand as well. if constexpr (std::is_same_v<Op, mlir::acc::UseDeviceOp>) { @@ -681,6 +703,8 @@ genDataOperandOperations(const Fortran::parser::AccObjectList &objectList, asyncDeviceTypes, asyncOnlyDeviceTypes, unwrapBoxAddr, info.isPresent); dataOperands.push_back(op.getAccVar()); + // Not adding this to symbolPairs because it only make sense to + // map the symbol to a single value. } } } @@ -1264,7 +1288,9 @@ static void genPrivatizationRecipes( llvm::SmallVector<mlir::Attribute> &privatizationRecipes, llvm::ArrayRef<mlir::Value> async, llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, - llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes) { + llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, + llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + *symbolPairs = nullptr) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); Fortran::evaluate::ExpressionAnalyzer ea{semanticsContext}; for (const auto &accObject : objectList.v) { @@ -1284,6 +1310,9 @@ static void genPrivatizationRecipes( /*strideIncludeLowerExtent=*/strideIncludeLowerExtent); LLVM_DEBUG(llvm::dbgs() << __func__ << "\n"; info.dump(llvm::dbgs())); + bool isWholeSymbol = + !designator || Fortran::evaluate::UnwrapWholeSymbolDataRef(*designator); + RecipeOp recipe; mlir::Type retTy = getTypeFromBounds(bounds, info.addr.getType()); if constexpr (std::is_same_v<RecipeOp, mlir::acc::PrivateRecipeOp>) { @@ -1297,6 +1326,11 @@ static void genPrivatizationRecipes( /*implicit=*/false, mlir::acc::DataClause::acc_private, retTy, async, asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true); dataOperands.push_back(op.getAccVar()); + + // Track the symbol and its corresponding mlir::Value if requested + if (symbolPairs && isWholeSymbol) + symbolPairs->emplace_back(op.getAccVar(), + Fortran::semantics::SymbolRef(symbol)); } else { std::string suffix = areAllBoundConstant(bounds) ? getBoundsString(bounds) : ""; @@ -1310,6 +1344,11 @@ static void genPrivatizationRecipes( async, asyncDeviceTypes, asyncOnlyDeviceTypes, /*unwrapBoxAddr=*/true); dataOperands.push_back(op.getAccVar()); + + // Track the symbol and its corresponding mlir::Value if requested + if (symbolPairs && isWholeSymbol) + symbolPairs->emplace_back(op.getAccVar(), + Fortran::semantics::SymbolRef(symbol)); } privatizationRecipes.push_back(mlir::SymbolRefAttr::get( builder.getContext(), recipe.getSymName().str())); @@ -1949,15 +1988,16 @@ mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder, return builder.getIntegerType(ivTypeSize * 8); } -static void -privatizeIv(Fortran::lower::AbstractConverter &converter, - const Fortran::semantics::Symbol &sym, mlir::Location loc, - llvm::SmallVector<mlir::Type> &ivTypes, - llvm::SmallVector<mlir::Location> &ivLocs, - llvm::SmallVector<mlir::Value> &privateOperands, - llvm::SmallVector<mlir::Value> &ivPrivate, - llvm::SmallVector<mlir::Attribute> &privatizationRecipes, - bool isDoConcurrent = false) { +static void privatizeIv( + Fortran::lower::AbstractConverter &converter, + const Fortran::semantics::Symbol &sym, mlir::Location loc, + llvm::SmallVector<mlir::Type> &ivTypes, + llvm::SmallVector<mlir::Location> &ivLocs, + llvm::SmallVector<mlir::Value> &privateOperands, + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + &ivPrivate, + llvm::SmallVector<mlir::Attribute> &privatizationRecipes, + bool isDoConcurrent = false) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym); @@ -2001,15 +2041,8 @@ privatizeIv(Fortran::lower::AbstractConverter &converter, builder.getContext(), recipe.getSymName().str())); } - // Map the new private iv to its symbol for the scope of the loop. bindSymbol - // might create a hlfir.declare op, if so, we map its result in order to - // use the sym value in the scope. - converter.bindSymbol(sym, mlir::acc::getAccVar(privateOp)); - auto privateValue = converter.getSymbolAddress(sym); - if (auto declareOp = - mlir::dyn_cast<hlfir::DeclareOp>(privateValue.getDefiningOp())) - privateValue = declareOp.getResults()[0]; - ivPrivate.push_back(privateValue); + ivPrivate.emplace_back(mlir::acc::getAccVar(privateOp), + Fortran::semantics::SymbolRef(sym)); } static void determineDefaultLoopParMode( @@ -2088,7 +2121,8 @@ static void processDoLoopBounds( llvm::SmallVector<mlir::Value> &upperbounds, llvm::SmallVector<mlir::Value> &steps, llvm::SmallVector<mlir::Value> &privateOperands, - llvm::SmallVector<mlir::Value> &ivPrivate, + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + &ivPrivate, llvm::SmallVector<mlir::Attribute> &privatizationRecipes, llvm::SmallVector<mlir::Type> &ivTypes, llvm::SmallVector<mlir::Location> &ivLocs, @@ -2144,11 +2178,25 @@ static void processDoLoopBounds( locs.push_back(converter.genLocation( Fortran::parser::FindSourceLocation(outerDoConstruct))); } else { - auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>(); - assert(doCons && "expect do construct"); - loopControl = &*doCons->GetLoopControl(); + // Safely locate the next inner DoConstruct within this eval. + const Fortran::parser::DoConstruct *innerDo = nullptr; + if (crtEval && crtEval->hasNestedEvaluations()) { + for (Fortran::lower::pft::Evaluation &child : + crtEval->getNestedEvaluations()) { + if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) { + innerDo = stmt; + // Prepare to descend for the next iteration + crtEval = &child; + break; + } + } + } + if (!innerDo) + break; // No deeper loop; stop collecting collapsed bounds. + + loopControl = &*innerDo->GetLoopControl(); locs.push_back(converter.genLocation( - Fortran::parser::FindSourceLocation(*doCons))); + Fortran::parser::FindSourceLocation(*innerDo))); } const Fortran::parser::LoopControl::Bounds *bounds = @@ -2172,32 +2220,127 @@ static void processDoLoopBounds( inclusiveBounds.push_back(true); - if (i < loopsToProcess - 1) - crtEval = &*std::next(crtEval->getNestedEvaluations().begin()); + // crtEval already updated when descending; no blind increment here. } } } -static mlir::acc::LoopOp -buildACCLoopOp(Fortran::lower::AbstractConverter &converter, - mlir::Location currentLocation, - Fortran::semantics::SemanticsContext &semanticsContext, - Fortran::lower::StatementContext &stmtCtx, - const Fortran::parser::DoConstruct &outerDoConstruct, - Fortran::lower::pft::Evaluation &eval, - llvm::SmallVector<mlir::Value> &privateOperands, - llvm::SmallVector<mlir::Attribute> &privatizationRecipes, - llvm::SmallVector<mlir::Value> &gangOperands, - llvm::SmallVector<mlir::Value> &workerNumOperands, - llvm::SmallVector<mlir::Value> &vectorOperands, - llvm::SmallVector<mlir::Value> &tileOperands, - llvm::SmallVector<mlir::Value> &cacheOperands, - llvm::SmallVector<mlir::Value> &reductionOperands, - llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue, - uint64_t loopsToProcess) { +/// Remap symbols that appeared in OpenACC data clauses to use the results of +/// the corresponding data operations. This allows isolating symbol accesses +/// inside the OpenACC region from accesses in the host and other regions while +/// preserving Fortran information about the symbols for optimizations. +template <typename RegionOp> +static void remapDataOperandSymbols( + Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder, + RegionOp ®ionOp, + const llvm::SmallVector< + std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + &dataOperandSymbolPairs) { + if (!enableSymbolRemapping || dataOperandSymbolPairs.empty()) + return; + + // Map Symbols that appeared inside data clauses to a new hlfir.declare whose + // input is the acc data operation result. + // This allows isolating all the symbol accesses inside the compute region + // from accesses in the host and other regions while preserving the Fortran + // information about the symbols for Fortran specific optimizations inside the + // region. + Fortran::lower::SymMap &symbolMap = converter.getSymbolMap(); + mlir::OpBuilder::InsertionGuard insertGuard(builder); + builder.setInsertionPointToStart(®ionOp.getRegion().front()); + llvm::SmallPtrSet<const Fortran::semantics::Symbol *, 8> seenSymbols; + mlir::IRMapping mapper; + for (auto [value, symbol] : dataOperandSymbolPairs) { + + // If A symbol appears on several data clause, just map it to the first + // result (all data operations results for a symbol are pointing same + // memory, so it does not matter which one is used). + if (seenSymbols.contains(&symbol.get())) + continue; + seenSymbols.insert(&symbol.get()); + std::optional<fir::FortranVariableOpInterface> hostDef = + symbolMap.lookupVariableDefinition(symbol); + assert(hostDef.has_value() && llvm::isa<hlfir::DeclareOp>(*hostDef) && + "expected symbol to be mapped to hlfir.declare"); + auto hostDeclare = llvm::cast<hlfir::DeclareOp>(*hostDef); + // Replace base input and DummyScope inputs. + mlir::Value hostInput = hostDeclare.getMemref(); + mlir::Type hostType = hostInput.getType(); + mlir::Type computeType = value.getType(); + if (hostType == computeType) { + mapper.map(hostInput, value); + } else if (llvm::isa<fir::BaseBoxType>(computeType)) { + assert(!llvm::isa<fir::BaseBoxType>(hostType) && + "box type mismatch between compute region variable and " + "hlfir.declare input unexpected"); + if (Fortran::semantics::IsOptional(symbol)) + TODO(regionOp.getLoc(), + "remapping OPTIONAL symbol in OpenACC compute region"); + auto rawValue = + fir::BoxAddrOp::create(builder, regionOp.getLoc(), hostType, value); + mapper.map(hostInput, rawValue); + } else { + assert(!llvm::isa<fir::BaseBoxType>(hostType) && + "compute region variable should not be raw address when host " + "hlfir.declare input was a box"); + assert(fir::isBoxAddress(hostType) == fir::isBoxAddress(computeType) && + "compute region variable should be a pointer/allocatable if and " + "only if host is"); + assert(fir::isa_ref_type(hostType) && fir::isa_ref_type(computeType) && + "compute region variable and host variable should both be raw " + "addresses"); + mlir::Value cast = + builder.createConvert(regionOp.getLoc(), hostType, value); + mapper.map(hostInput, cast); + } + if (mlir::Value dummyScope = hostDeclare.getDummyScope()) { + // Copy the dummy scope into the region so that aliasing rules about + // Fortran dummies are understood inside the region and the abstract dummy + // scope type does not have to cross the OpenACC compute region boundary. + if (!mapper.contains(dummyScope)) { + mlir::Operation *hostDummyScopeOp = dummyScope.getDefiningOp(); + assert(hostDummyScopeOp && + "dummyScope defining operation must be visible in lowering"); + (void)builder.clone(*hostDummyScopeOp, mapper); + } + } + + mlir::Operation *computeDef = + builder.clone(*hostDeclare.getOperation(), mapper); + + // The input box already went through an hlfir.declare. It has the correct + // local lower bounds and attribute. Do not generate a new fir.rebox. + if (llvm::isa<fir::BaseBoxType>(hostDeclare.getMemref().getType())) + llvm::cast<hlfir::DeclareOp>(*computeDef).setSkipRebox(true); + + symbolMap.addVariableDefinition( + symbol, llvm::cast<fir::FortranVariableOpInterface>(computeDef)); + } +} + +static mlir::acc::LoopOp buildACCLoopOp( + Fortran::lower::AbstractConverter &converter, + mlir::Location currentLocation, + Fortran::semantics::SemanticsContext &semanticsContext, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::DoConstruct &outerDoConstruct, + Fortran::lower::pft::Evaluation &eval, + llvm::SmallVector<mlir::Value> &privateOperands, + llvm::SmallVector<mlir::Attribute> &privatizationRecipes, + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + &dataOperandSymbolPairs, + llvm::SmallVector<mlir::Value> &gangOperands, + llvm::SmallVector<mlir::Value> &workerNumOperands, + llvm::SmallVector<mlir::Value> &vectorOperands, + llvm::SmallVector<mlir::Value> &tileOperands, + llvm::SmallVector<mlir::Value> &cacheOperands, + llvm::SmallVector<mlir::Value> &reductionOperands, + llvm::SmallVector<mlir::Type> &retTy, mlir::Value yieldValue, + uint64_t loopsToProcess) { fir::FirOpBuilder &builder = converter.getFirOpBuilder(); - llvm::SmallVector<mlir::Value> ivPrivate; + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + ivPrivate; llvm::SmallVector<mlir::Type> ivTypes; llvm::SmallVector<mlir::Location> ivLocs; llvm::SmallVector<bool> inclusiveBounds; @@ -2231,10 +2374,22 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter, builder, builder.getFusedLoc(locs), currentLocation, eval, operands, operandSegments, /*outerCombined=*/false, retTy, yieldValue, ivTypes, ivLocs); - - for (auto [arg, value] : llvm::zip( - loopOp.getLoopRegions().front()->front().getArguments(), ivPrivate)) - fir::StoreOp::create(builder, currentLocation, arg, value); + // Ensure the iv symbol is mapped to private iv SSA value for the scope of + // the loop even if it did not appear explicitly in a PRIVATE clause (if it + // appeared explicitly in such clause, that is also fine because duplicates + // in the list are ignored). + dataOperandSymbolPairs.append(ivPrivate.begin(), ivPrivate.end()); + // Remap symbols from data clauses to use data operation results + remapDataOperandSymbols(converter, builder, loopOp, dataOperandSymbolPairs); + + for (auto [arg, iv] : + llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(), + ivPrivate)) { + // Store block argument to the related iv private variable. + mlir::Value privateValue = + converter.getSymbolAddress(std::get<Fortran::semantics::SymbolRef>(iv)); + fir::StoreOp::create(builder, currentLocation, arg, privateValue); + } loopOp.setInclusiveUpperbound(inclusiveBounds); @@ -2260,6 +2415,10 @@ static mlir::acc::LoopOp createLoopOp( llvm::SmallVector<int32_t> tileOperandsSegments, gangOperandsSegments; llvm::SmallVector<int64_t> collapseValues; + // Vector to track mlir::Value results and their corresponding Fortran symbols + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + dataOperandSymbolPairs; + llvm::SmallVector<mlir::Attribute> gangArgTypes; llvm::SmallVector<mlir::Attribute> seqDeviceTypes, independentDeviceTypes, autoDeviceTypes, vectorOperandsDeviceTypes, workerNumOperandsDeviceTypes, @@ -2380,7 +2539,8 @@ static mlir::acc::LoopOp createLoopOp( genPrivatizationRecipes<mlir::acc::PrivateRecipeOp>( privateClause->v, converter, semanticsContext, stmtCtx, privateOperands, privatizationRecipes, /*async=*/{}, - /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}); + /*asyncDeviceTypes=*/{}, /*asyncOnlyDeviceTypes=*/{}, + &dataOperandSymbolPairs); } else if (const auto *reductionClause = std::get_if<Fortran::parser::AccClause::Reduction>( &clause.u)) { @@ -2406,10 +2566,6 @@ static mlir::acc::LoopOp createLoopOp( std::get_if<Fortran::parser::AccClause::Collapse>( &clause.u)) { const Fortran::parser::AccCollapseArg &arg = collapseClause->v; - const auto &force = std::get<bool>(arg.t); - if (force) - TODO(clauseLocation, "OpenACC collapse force modifier"); - const auto &intExpr = std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t); const auto *expr = Fortran::semantics::GetExpr(intExpr); @@ -2436,9 +2592,9 @@ static mlir::acc::LoopOp createLoopOp( Fortran::lower::getLoopCountForCollapseAndTile(accClauseList); auto loopOp = buildACCLoopOp( converter, currentLocation, semanticsContext, stmtCtx, outerDoConstruct, - eval, privateOperands, privatizationRecipes, gangOperands, - workerNumOperands, vectorOperands, tileOperands, cacheOperands, - reductionOperands, retTy, yieldValue, loopsToProcess); + eval, privateOperands, privatizationRecipes, dataOperandSymbolPairs, + gangOperands, workerNumOperands, vectorOperands, tileOperands, + cacheOperands, reductionOperands, retTy, yieldValue, loopsToProcess); if (!gangDeviceTypes.empty()) loopOp.setGangAttr(builder.getArrayAttr(gangDeviceTypes)); @@ -2568,7 +2724,9 @@ static void genDataOperandOperationsWithModifier( llvm::ArrayRef<mlir::Value> async, llvm::ArrayRef<mlir::Attribute> asyncDeviceTypes, llvm::ArrayRef<mlir::Attribute> asyncOnlyDeviceTypes, - bool setDeclareAttr = false) { + bool setDeclareAttr = false, + llvm::SmallVectorImpl<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + *symbolPairs = nullptr) { const Fortran::parser::AccObjectListWithModifier &listWithModifier = x->v; const auto &accObjectList = std::get<Fortran::parser::AccObjectList>(listWithModifier.t); @@ -2581,7 +2739,7 @@ static void genDataOperandOperationsWithModifier( stmtCtx, dataClauseOperands, dataClause, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, asyncOnlyDeviceTypes, - setDeclareAttr); + setDeclareAttr, symbolPairs); } template <typename Op> @@ -2612,6 +2770,10 @@ static Op createComputeOp( llvm::SmallVector<mlir::Attribute> privatizationRecipes, firstPrivatizationRecipes, reductionRecipes; + // Vector to track mlir::Value results and their corresponding Fortran symbols + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + dataOperandSymbolPairs; + // Self clause has optional values but can be present with // no value as well. When there is no value, the op has an attribute to // represent the clause. @@ -2732,7 +2894,8 @@ static Op createComputeOp( copyClause->v, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_copy, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *copyinClause = @@ -2744,7 +2907,8 @@ static Op createComputeOp( Fortran::parser::AccDataModifier::Modifier::ReadOnly, dataClauseOperands, mlir::acc::DataClause::acc_copyin, mlir::acc::DataClause::acc_copyin_readonly, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); copyinEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *copyoutClause = @@ -2757,7 +2921,8 @@ static Op createComputeOp( Fortran::parser::AccDataModifier::Modifier::ReadOnly, dataClauseOperands, mlir::acc::DataClause::acc_copyout, mlir::acc::DataClause::acc_copyout_zero, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); copyoutEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *createClause = @@ -2769,7 +2934,8 @@ static Op createComputeOp( Fortran::parser::AccDataModifier::Modifier::Zero, dataClauseOperands, mlir::acc::DataClause::acc_create, mlir::acc::DataClause::acc_create_zero, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); createEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *noCreateClause = @@ -2780,7 +2946,8 @@ static Op createComputeOp( noCreateClause->v, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_no_create, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); nocreateEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *presentClause = @@ -2791,17 +2958,21 @@ static Op createComputeOp( presentClause->v, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_present, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); presentEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *devicePtrClause = std::get_if<Fortran::parser::AccClause::Deviceptr>( &clause.u)) { + llvm::SmallVectorImpl< + std::pair<mlir::Value, Fortran::semantics::SymbolRef>> *symPairs = + enableDevicePtrRemap ? &dataOperandSymbolPairs : nullptr; genDataOperandOperations<mlir::acc::DevicePtrOp>( devicePtrClause->v, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_deviceptr, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, symPairs); } else if (const auto *attachClause = std::get_if<Fortran::parser::AccClause::Attach>(&clause.u)) { auto crtDataStart = dataClauseOperands.size(); @@ -2809,7 +2980,8 @@ static Op createComputeOp( attachClause->v, converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_attach, /*structured=*/true, /*implicit=*/false, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); attachEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } else if (const auto *privateClause = @@ -2819,14 +2991,14 @@ static Op createComputeOp( genPrivatizationRecipes<mlir::acc::PrivateRecipeOp>( privateClause->v, converter, semanticsContext, stmtCtx, privateOperands, privatizationRecipes, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, &dataOperandSymbolPairs); } else if (const auto *firstprivateClause = std::get_if<Fortran::parser::AccClause::Firstprivate>( &clause.u)) { genPrivatizationRecipes<mlir::acc::FirstprivateRecipeOp>( firstprivateClause->v, converter, semanticsContext, stmtCtx, firstprivateOperands, firstPrivatizationRecipes, async, - asyncDeviceTypes, asyncOnlyDeviceTypes); + asyncDeviceTypes, asyncOnlyDeviceTypes, &dataOperandSymbolPairs); } else if (const auto *reductionClause = std::get_if<Fortran::parser::AccClause::Reduction>( &clause.u)) { @@ -2846,7 +3018,8 @@ static Op createComputeOp( converter, semanticsContext, stmtCtx, dataClauseOperands, mlir::acc::DataClause::acc_reduction, /*structured=*/true, /*implicit=*/true, async, asyncDeviceTypes, - asyncOnlyDeviceTypes); + asyncOnlyDeviceTypes, /*setDeclareAttr=*/false, + &dataOperandSymbolPairs); copyEntryOperands.append(dataClauseOperands.begin() + crtDataStart, dataClauseOperands.end()); } @@ -2945,6 +3118,11 @@ static Op createComputeOp( computeOp.setCombinedAttr(builder.getUnitAttr()); auto insPt = builder.saveInsertionPoint(); + + // Remap symbols from data clauses to use data operation results + remapDataOperandSymbols(converter, builder, computeOp, + dataOperandSymbolPairs); + builder.setInsertionPointAfter(computeOp); // Create the exit operations after the region. @@ -4860,25 +5038,34 @@ void Fortran::lower::genEarlyReturnInOpenACCLoop(fir::FirOpBuilder &builder, uint64_t Fortran::lower::getLoopCountForCollapseAndTile( const Fortran::parser::AccClauseList &clauseList) { - uint64_t collapseLoopCount = 1; + uint64_t collapseLoopCount = getCollapseSizeAndForce(clauseList).first; uint64_t tileLoopCount = 1; for (const Fortran::parser::AccClause &clause : clauseList.v) { - if (const auto *collapseClause = - std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) { - const parser::AccCollapseArg &arg = collapseClause->v; - const auto &collapseValue{std::get<parser::ScalarIntConstantExpr>(arg.t)}; - collapseLoopCount = *Fortran::semantics::GetIntValue(collapseValue); - } if (const auto *tileClause = std::get_if<Fortran::parser::AccClause::Tile>(&clause.u)) { const parser::AccTileExprList &tileExprList = tileClause->v; - const std::list<parser::AccTileExpr> &listTileExpr = tileExprList.v; - tileLoopCount = listTileExpr.size(); + tileLoopCount = tileExprList.v.size(); } } - if (tileLoopCount > collapseLoopCount) - return tileLoopCount; - return collapseLoopCount; + return tileLoopCount > collapseLoopCount ? tileLoopCount : collapseLoopCount; +} + +std::pair<uint64_t, bool> Fortran::lower::getCollapseSizeAndForce( + const Fortran::parser::AccClauseList &clauseList) { + uint64_t size = 1; + bool force = false; + for (const Fortran::parser::AccClause &clause : clauseList.v) { + if (const auto *collapseClause = + std::get_if<Fortran::parser::AccClause::Collapse>(&clause.u)) { + const Fortran::parser::AccCollapseArg &arg = collapseClause->v; + force = std::get<bool>(arg.t); + const auto &collapseValue = + std::get<Fortran::parser::ScalarIntConstantExpr>(arg.t); + size = *Fortran::semantics::GetIntValue(collapseValue); + break; + } + } + return {size, force}; } /// Create an ACC loop operation for a DO construct when inside ACC compute @@ -4921,6 +5108,8 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( reductionOperands; llvm::SmallVector<mlir::Attribute> privatizationRecipes; llvm::SmallVector<mlir::Type> retTy; + llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>> + dataOperandSymbolPairs; mlir::Value yieldValue; uint64_t loopsToProcess = 1; // Single loop construct @@ -4929,9 +5118,10 @@ mlir::Operation *Fortran::lower::genOpenACCLoopFromDoConstruct( Fortran::lower::StatementContext stmtCtx; auto loopOp = buildACCLoopOp( converter, converter.getCurrentLocation(), semanticsContext, stmtCtx, - doConstruct, eval, privateOperands, privatizationRecipes, gangOperands, - workerNumOperands, vectorOperands, tileOperands, cacheOperands, - reductionOperands, retTy, yieldValue, loopsToProcess); + doConstruct, eval, privateOperands, privatizationRecipes, + dataOperandSymbolPairs, gangOperands, workerNumOperands, vectorOperands, + tileOperands, cacheOperands, reductionOperands, retTy, yieldValue, + loopsToProcess); fir::FirOpBuilder &builder = converter.getFirOpBuilder(); if (!privatizationRecipes.empty()) diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 9e56c2b..bd94651 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -153,6 +153,7 @@ public: clauseOps.loopLowerBounds = ops.loopLowerBounds; clauseOps.loopUpperBounds = ops.loopUpperBounds; clauseOps.loopSteps = ops.loopSteps; + clauseOps.collapseNumLoops = ops.collapseNumLoops; ivOut.append(iv); return true; } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 50603cb..4a05cd9 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3348,7 +3348,7 @@ struct DoConcurrentSpecifierOpConversion : public fir::FIROpConversion<OpTy> { mlir::ConversionPatternRewriter &rewriter) const override { #ifdef EXPENSIVE_CHECKS auto uses = mlir::SymbolTable::getSymbolUses( - specifier, specifier->getParentOfType<mlir::ModuleOp>()); + specifier, specifier->template getParentOfType<mlir::ModuleOp>()); // `fir.local|fir.declare_reduction` ops are not supposed to have any uses // at this point (i.e. during lowering to LLVM). In case of serialization, diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index e595e61..260e525 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -77,6 +77,10 @@ class MapInfoFinalizationPass /// | | std::map<mlir::Operation *, mlir::Value> localBoxAllocas; + // List of deferrable descriptors to process at the end of + // the pass. + llvm::SmallVector<mlir::Operation *> deferrableDesc; + /// Return true if the given path exists in a list of paths. static bool containsPath(const llvm::SmallVectorImpl<llvm::SmallVector<int64_t>> &paths, @@ -183,6 +187,40 @@ class MapInfoFinalizationPass newMemberIndexPaths.emplace_back(indexPath.begin(), indexPath.end()); } + // Check if the declaration operation we have refers to a dummy + // function argument. + bool isDummyArgument(mlir::Value mappedValue) { + if (auto declareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>( + mappedValue.getDefiningOp())) + if (auto dummyScope = declareOp.getDummyScope()) + return true; + return false; + } + + // Relevant for OpenMP < 5.2, where attach semantics and rules don't exist. + // As descriptors were an unspoken implementation detail in these versions + // there's certain cases where the user (and the compiler implementation) + // can create data mapping errors by having temporary descriptors stuck + // in memory. The main example is calling an 'target enter data map' + // without a corresponding exit on an assumed shape or size dummy + // argument, a local stack descriptor is generated, gets mapped and + // is then left on device. A user doesn't realize what they've done as + // the OpenMP specification isn't explicit on descriptor handling in + // earlier versions and as far as Fortran is concerned this si something + // hidden from a user. To avoid this we can defer the descriptor mapping + // in these cases until target or target data regions, when we can be + // sure they have a clear limited scope on device. + bool canDeferDescriptorMapping(mlir::Value descriptor) { + if (fir::isAllocatableType(descriptor.getType()) || + fir::isPointerType(descriptor.getType())) + return false; + if (isDummyArgument(descriptor) && + (fir::isAssumedType(descriptor.getType()) || + fir::isAssumedShape(descriptor.getType()))) + return true; + return false; + } + /// getMemberUserList gathers all users of a particular MapInfoOp that are /// other MapInfoOp's and places them into the mapMemberUsers list, which /// records the map that the current argument MapInfoOp "op" is part of @@ -234,13 +272,16 @@ class MapInfoFinalizationPass /// fir::BoxOffsetOp we utilise to access the descriptor datas /// base address can be utilised. mlir::Value getDescriptorFromBoxMap(mlir::omp::MapInfoOp boxMap, - fir::FirOpBuilder &builder) { + fir::FirOpBuilder &builder, + bool &canDescBeDeferred) { mlir::Value descriptor = boxMap.getVarPtr(); if (!fir::isTypeWithDescriptor(boxMap.getVarType())) if (auto addrOp = mlir::dyn_cast_if_present<fir::BoxAddrOp>( boxMap.getVarPtr().getDefiningOp())) descriptor = addrOp.getVal(); + canDescBeDeferred = canDeferDescriptorMapping(descriptor); + if (!mlir::isa<fir::BaseBoxType>(descriptor.getType()) && !fir::factory::isOptionalArgument(descriptor.getDefiningOp())) return descriptor; @@ -391,8 +432,7 @@ class MapInfoFinalizationPass /// Check if the mapOp is present in the HasDeviceAddr clause on /// the userOp. Only applies to TargetOp. - bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation *userOp) { - assert(userOp && "Expecting non-null argument"); + bool isHasDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(userOp)) { for (mlir::Value hda : targetOp.getHasDeviceAddrVars()) { if (hda.getDefiningOp() == mapOp) @@ -402,6 +442,26 @@ class MapInfoFinalizationPass return false; } + bool isUseDeviceAddr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { + if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(userOp)) { + for (mlir::Value uda : targetDataOp.getUseDeviceAddrVars()) { + if (uda.getDefiningOp() == mapOp) + return true; + } + } + return false; + } + + bool isUseDevicePtr(mlir::omp::MapInfoOp mapOp, mlir::Operation &userOp) { + if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(userOp)) { + for (mlir::Value udp : targetDataOp.getUseDevicePtrVars()) { + if (udp.getDefiningOp() == mapOp) + return true; + } + } + return false; + } + mlir::omp::MapInfoOp genBoxcharMemberMap(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder) { if (!op.getMembers().empty()) @@ -466,12 +526,14 @@ class MapInfoFinalizationPass // TODO: map the addendum segment of the descriptor, similarly to the // base address/data pointer member. - mlir::Value descriptor = getDescriptorFromBoxMap(op, builder); + bool descCanBeDeferred = false; + mlir::Value descriptor = + getDescriptorFromBoxMap(op, builder, descCanBeDeferred); mlir::ArrayAttr newMembersAttr; mlir::SmallVector<mlir::Value> newMembers; llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices; - bool isHasDeviceAddrFlag = isHasDeviceAddr(op, target); + bool isHasDeviceAddrFlag = isHasDeviceAddr(op, *target); if (!mapMemberUsers.empty() || !op.getMembers().empty()) getMemberIndicesAsVectors( @@ -553,6 +615,10 @@ class MapInfoFinalizationPass /*partial_map=*/builder.getBoolAttr(false)); op.replaceAllUsesWith(newDescParentMapOp.getResult()); op->erase(); + + if (descCanBeDeferred) + deferrableDesc.push_back(newDescParentMapOp); + return newDescParentMapOp; } @@ -701,6 +767,124 @@ class MapInfoFinalizationPass return nullptr; } + void addImplicitDescriptorMapToTargetDataOp(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder, + mlir::Operation &target) { + // Checks if the map is present as an explicit map already on the target + // data directive, and not just present on a use_device_addr/ptr, as if + // that's the case, we should not need to add an implicit map for the + // descriptor. + auto explicitMappingPresent = [](mlir::omp::MapInfoOp op, + mlir::omp::TargetDataOp tarData) { + // Verify top-level descriptor mapping is at least equal with same + // varPtr, the map type should always be To for a descriptor, which is + // all we really care about for this mapping as we aim to make sure the + // descriptor is always present on device if we're expecting to access + // the underlying data. + if (tarData.getMapVars().empty()) + return false; + + for (mlir::Value mapVar : tarData.getMapVars()) { + auto mapOp = llvm::cast<mlir::omp::MapInfoOp>(mapVar.getDefiningOp()); + if (mapOp.getVarPtr() == op.getVarPtr() && + mapOp.getVarPtrPtr() == op.getVarPtrPtr()) { + return true; + } + } + + return false; + }; + + // if we're not a top level descriptor with members (e.g. member of a + // derived type), we do not want to perform this step. + if (!llvm::isa<mlir::omp::TargetDataOp>(target) || op.getMembers().empty()) + return; + + if (!isUseDeviceAddr(op, target) && !isUseDevicePtr(op, target)) + return; + + auto targetDataOp = llvm::cast<mlir::omp::TargetDataOp>(target); + if (explicitMappingPresent(op, targetDataOp)) + return; + + mlir::omp::MapInfoOp newDescParentMapOp = + builder.create<mlir::omp::MapInfoOp>( + op->getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), + builder.getIntegerAttr( + builder.getIntegerType(64, false), + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO | + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS)), + op.getMapCaptureTypeAttr(), /*varPtrPtr=*/mlir::Value{}, + mlir::SmallVector<mlir::Value>{}, mlir::ArrayAttr{}, + /*bounds=*/mlir::SmallVector<mlir::Value>{}, + /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + targetDataOp.getMapVarsMutable().append({newDescParentMapOp}); + } + + void removeTopLevelDescriptor(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder, + mlir::Operation *target) { + if (llvm::isa<mlir::omp::TargetOp, mlir::omp::TargetDataOp, + mlir::omp::DeclareMapperInfoOp>(target)) + return; + + // if we're not a top level descriptor with members (e.g. member of a + // derived type), we do not want to perform this step. + if (op.getMembers().empty()) + return; + + mlir::SmallVector<mlir::Value> members = op.getMembers(); + mlir::omp::MapInfoOp baseAddr = + mlir::dyn_cast_or_null<mlir::omp::MapInfoOp>( + members.front().getDefiningOp()); + assert(baseAddr && "Expected member to be MapInfoOp"); + members.erase(members.begin()); + + llvm::SmallVector<llvm::SmallVector<int64_t>> memberIndices; + getMemberIndicesAsVectors(op, memberIndices); + + // Can skip the extra processing if there's only 1 member as it'd + // be the base addresses, which we're promoting to the parent. + mlir::ArrayAttr membersAttr; + if (memberIndices.size() > 1) { + memberIndices.erase(memberIndices.begin()); + membersAttr = builder.create2DI64ArrayAttr(memberIndices); + } + + // VarPtrPtr is tied to detecting if something is a pointer in the later + // lowering currently, this at the moment comes tied with + // OMP_MAP_PTR_AND_OBJ being applied which breaks the problem this tries to + // solve by emitting a 8-byte mapping tied to the descriptor address (even + // if we only emit a single map). So we circumvent this by removing the + // varPtrPtr mapping, however, a side affect of this is we lose the + // additional load from the backend tied to this which is required for + // correctness and getting the correct address of the data to perform our + // mapping. So we do our load at this stage. + // TODO/FIXME: Tidy up the OMP_MAP_PTR_AND_OBJ and varPtrPtr being tied to + // if something is a pointer to try and tidy up the implementation a bit. + // This is an unfortunate complexity from push-back from upstream. We + // could also emit a load at this level for all base addresses as well, + // which in turn will simplify the later lowering a bit as well. But first + // need to see how well this alteration works. + auto loadBaseAddr = + builder.loadIfRef(op->getLoc(), baseAddr.getVarPtrPtr()); + mlir::omp::MapInfoOp newBaseAddrMapOp = + builder.create<mlir::omp::MapInfoOp>( + op->getLoc(), loadBaseAddr.getType(), loadBaseAddr, + baseAddr.getVarTypeAttr(), baseAddr.getMapTypeAttr(), + baseAddr.getMapCaptureTypeAttr(), mlir::Value{}, members, + membersAttr, baseAddr.getBounds(), + /*mapperId*/ mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newBaseAddrMapOp.getResult()); + op->erase(); + baseAddr.erase(); + } + // This pass executes on omp::MapInfoOp's containing descriptor based types // (allocatables, pointers, assumed shape etc.) and expanding them into // multiple omp::MapInfoOp's for each pointer member contained within the @@ -730,6 +914,7 @@ class MapInfoFinalizationPass // clear all local allocations we made for any boxes in any prior // iterations from previous function scopes. localBoxAllocas.clear(); + deferrableDesc.clear(); // First, walk `omp.map.info` ops to see if any of them have varPtrs // with an underlying type of fir.char<k, ?>, i.e a character @@ -1010,6 +1195,36 @@ class MapInfoFinalizationPass } }); + // Now that we've expanded all of our boxes into a descriptor and base + // address map where necessary, we check if the map owner is an + // enter/exit/target data directive, and if they are we drop the initial + // descriptor (top-level parent) and replace it with the + // base_address/data. + // + // This circumvents issues with stack allocated descriptors bound to + // device colliding which in Flang is rather trivial for a user to do by + // accident due to the rather pervasive local intermediate descriptor + // generation that occurs whenever you pass boxes around different scopes. + // In OpenMP 6+ mapping these would be a user error as the tools required + // to circumvent these issues are provided by the spec (ref_ptr/ptee map + // types), but in prior specifications these tools are not available and + // it becomes an implementation issue for us to solve. + // + // We do this by dropping the top-level descriptor which will be the stack + // descriptor when we perform enter/exit maps, as we don't want these to + // be bound until necessary which is when we utilise the descriptor type + // within a target region. At which point we map the relevant descriptor + // data and the runtime should correctly associate the data with the + // descriptor and bind together and allow clean mapping and execution. + for (auto *op : deferrableDesc) { + auto mapOp = llvm::dyn_cast<mlir::omp::MapInfoOp>(op); + mlir::Operation *targetUser = getFirstTargetUser(mapOp); + assert(targetUser && "expected user of map operation was not found"); + builder.setInsertionPoint(mapOp); + removeTopLevelDescriptor(mapOp, builder, targetUser); + addImplicitDescriptorMapToTargetDataOp(mapOp, builder, *targetUser); + } + // Wait until after we have generated all of our maps to add them onto // the target's block arguments, simplifying the process as there would be // no need to avoid accidental duplicate additions. diff --git a/flang/test/Examples/omp-atomic.f90 b/flang/test/Examples/omp-atomic.f90 index 5695b62..d7e0a1c 100644 --- a/flang/test/Examples/omp-atomic.f90 +++ b/flang/test/Examples/omp-atomic.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s ! Check OpenMP 2.13.6 atomic Construct diff --git a/flang/test/Examples/omp-declarative-directive.f90 b/flang/test/Examples/omp-declarative-directive.f90 index 4a9ad91..6e1b0be 100644 --- a/flang/test/Examples/omp-declarative-directive.f90 +++ b/flang/test/Examples/omp-declarative-directive.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s ! Check OpenMP declarative directives diff --git a/flang/test/Examples/omp-device-constructs.f90 b/flang/test/Examples/omp-device-constructs.f90 index 916f7c9..ae52f73 100644 --- a/flang/test/Examples/omp-device-constructs.f90 +++ b/flang/test/Examples/omp-device-constructs.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -!RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +!RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s ! Check OpenMP clause validity for the following directives: ! 2.10 Device constructs diff --git a/flang/test/Examples/omp-in-reduction-clause.f90 b/flang/test/Examples/omp-in-reduction-clause.f90 index fc3fff5..ced6722 100644 --- a/flang/test/Examples/omp-in-reduction-clause.f90 +++ b/flang/test/Examples/omp-in-reduction-clause.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s ! Check for IN_REDUCTION() clause on OpenMP constructs diff --git a/flang/test/Examples/omp-nowait.f90 b/flang/test/Examples/omp-nowait.f90 index 091a952..1d8f9e0 100644 --- a/flang/test/Examples/omp-nowait.f90 +++ b/flang/test/Examples/omp-nowait.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s subroutine sb(n) implicit none diff --git a/flang/test/Examples/omp-order-clause.f90 b/flang/test/Examples/omp-order-clause.f90 index 8d1c3f4..976c750 100644 --- a/flang/test/Examples/omp-order-clause.f90 +++ b/flang/test/Examples/omp-order-clause.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s ! Check for ORDER([order-modifier :]concurrent) clause on OpenMP constructs diff --git a/flang/test/Examples/omp-sections.f90 b/flang/test/Examples/omp-sections.f90 index a6d2806..96de363 100644 --- a/flang/test/Examples/omp-sections.f90 +++ b/flang/test/Examples/omp-sections.f90 @@ -1,6 +1,6 @@ ! REQUIRES: plugins, examples, shell -! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport.so -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s +! RUN: %flang_fc1 -load %llvmshlibdir/flangOmpReport%pluginext -plugin flang-omp-report -fopenmp %s -o - | FileCheck %s subroutine omp_sections() integer :: x diff --git a/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90 b/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90 new file mode 100644 index 0000000..9d36f6a --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-data-operands-remapping.f90 @@ -0,0 +1,601 @@ +! Test remapping of variables appearing in OpenACC data clauses +! to the related acc dialect data operation result. + +! This tests checks how the hlfir.declare is recreated and used inside +! the acc compute region. + +! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s + +module m +interface +subroutine takes_scalar(x) + real :: x +end subroutine +subroutine takes_scalar_character(c, l) + integer :: l + character(l) :: c +end subroutine +subroutine takes_explicit_cst_shape(x) + real :: x(100) +end subroutine +subroutine takes_explicit_shape(x, n) + real :: x(n) +end subroutine +subroutine takes_assumed_shape(x) + real :: x(:) +end subroutine +subroutine takes_pointer(x) + real, pointer :: x(:) +end subroutine + +subroutine takes_optional_scalar(x) + real, optional :: x +end subroutine +subroutine takes_optional_explicit_cst_shape(x) + real, optional :: x(100) +end subroutine +subroutine takes_optional_explicit_shape(x, n) + real, optional :: x(n) +end subroutine +subroutine takes_optional_assumed_shape(x) + real, optional :: x(:) +end subroutine +subroutine takes_optional_pointer(x) + real, optional, pointer :: x(:) +end subroutine +end interface +contains + +! ----------------------------- Test forwarding ------------------------------ ! + + subroutine test_scalar(x) + real :: x + !$acc parallel copy(x) + call takes_scalar(x) + !$acc end parallel + end subroutine + + subroutine test_scalar_character(c, l) + integer :: l + character(l) :: c + !$acc parallel copy(x) + call takes_scalar_character(c, len(c)) + !$acc end parallel + end subroutine + + subroutine test_cst_shape(x) + real :: x(100) + !$acc parallel copy(x) + call takes_explicit_cst_shape(x) + !$acc end parallel + end subroutine + + subroutine test_explicit_shape(x, n) + real :: x(n) + !$acc parallel copy(x) + call takes_explicit_shape(x, size(x,dim=1)) + !$acc end parallel + end subroutine + + subroutine test_assumed_shape(x, n) + real :: x(:) + !$acc parallel copy(x) + call takes_assumed_shape(x) + !$acc end parallel + end subroutine + + subroutine test_contiguous_assumed_shape(x, n) + real, contiguous :: x(:) + !$acc parallel copy(x) + call takes_explicit_shape(x, size(x,dim=1)) + !$acc end parallel + end subroutine + + subroutine test_pointer(x, n) + real, pointer :: x(:) + !$acc parallel copy(x) + call takes_pointer(x) + !$acc end parallel + end subroutine + + subroutine test_using_both_results(x, n) + real :: x(n) + !$acc parallel copy(x) + ! using hlfir.declare result #0 + call takes_assumed_shape(x) + ! using hlfir.declare result #1 + call takes_explicit_shape(x, size(x,dim=1)) + !$acc end parallel + end subroutine + +! ------------------------- Test array addressing ---------------------------- ! + + subroutine addressing_cst_shape(x) + real :: x(10, 20) + !$acc parallel copy(x) + call takes_scalar(x(2,3)) + !$acc end parallel + end subroutine + + subroutine addressing_explicit_shape(x, n, m) + real :: x(n, m) + !$acc parallel copy(x) + call takes_scalar(x(2,3)) + !$acc end parallel + end subroutine + + subroutine addressing_assumed_shape(x, n) + real :: x(:, :) + !$acc parallel copy(x) + call takes_scalar(x(2,3)) + !$acc end parallel + end subroutine + + subroutine addressing_contiguous_assumed_shape(x, n) + real, contiguous :: x(:, :) + !$acc parallel copy(x) + call takes_scalar(x(2,3)) + !$acc end parallel + end subroutine + + subroutine addressing_pointer(x) + real, pointer :: x(:, :) + !$acc parallel copy(x) + call takes_scalar(x(2,3)) + !$acc end parallel + end subroutine + +! ------------------------ Test OPTIONAL handling ---------------------------- ! + + subroutine test_optional_scalar(x) + real, optional :: x + !$acc parallel copy(x) + call takes_optional_scalar(x) + !$acc end parallel + end subroutine + + subroutine test_optional_explicit_cst_shape(x) + real, optional :: x(100) + !$acc parallel copy(x) + call takes_optional_explicit_cst_shape(x) + !$acc end parallel + end subroutine + + subroutine test_optional_explicit_shape(x, n) + real, optional :: x(n) + !$acc parallel copy(x) + call takes_optional_explicit_shape(x, n) + !$acc end parallel + end subroutine + + subroutine test_optional_assumed_shape(x) + real, optional :: x(:) + !$acc parallel copy(x) + call takes_optional_assumed_shape(x) + !$acc end parallel + end subroutine + + subroutine test_optional_pointer(x) + real, optional, pointer :: x(:) + !$acc parallel copy(x) + call takes_optional_pointer(x) + !$acc end parallel + end subroutine + +end module + +! CHECK-LABEL: func.func @_QMmPtest_scalar( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<f32>) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {uniq_name = "_QMmFtest_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_4]]#0) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<f32>) to varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_scalar_character( +! CHECK-SAME: %[[ARG0:.*]]: !fir.boxchar<1> {fir.bindc_name = "c"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "l"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalar_characterEl"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", uniq_name = "_QMmFtest_scalar_characterEx"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QMmFtest_scalar_characterEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: %[[VAL_4:.*]]:2 = fir.unboxchar %[[ARG0]] : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index) +! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_7:.*]] = arith.cmpi sgt, %[[VAL_5]], %[[VAL_6]] : i32 +! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_7]], %[[VAL_5]], %[[VAL_6]] : i32 +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[VAL_4]]#0 typeparams %[[VAL_8]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_scalar_characterEc"} : (!fir.ref<!fir.char<1,?>>, i32, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref<!fir.char<1,?>>) +! CHECK: %[[VAL_10:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.ref<f32>) { +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMmFtest_scalar_characterEx"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: %[[VAL_12:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) +! CHECK: fir.call @_QPtakes_scalar_character(%[[VAL_9]]#0, %[[VAL_12]]#0) fastmath<contract> : (!fir.boxchar<1>, !fir.ref<i32>) -> () +! CHECK: hlfir.end_associate %[[VAL_12]]#1, %[[VAL_12]]#2 : !fir.ref<i32>, i1 +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_10]] : !fir.ref<f32>) to varPtr(%[[VAL_3]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_cst_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_2]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>) +! CHECK: %[[VAL_4:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<100xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) { +! CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_2]]) dummy_scope %[[VAL_5]] {uniq_name = "_QMmFtest_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>) +! CHECK: fir.call @_QPtakes_explicit_cst_shape(%[[VAL_6]]#0) fastmath<contract> : (!fir.ref<!fir.array<100xf32>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) to varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_explicit_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_10:.*]] = acc.copyin var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) { +! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> +! CHECK: %[[VAL_12:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_8]]) dummy_scope %[[VAL_12]] {uniq_name = "_QMmFtest_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (index) -> i64 +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32 +! CHECK: %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_15]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) +! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_13]]#1, %[[VAL_16]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> () +! CHECK: hlfir.end_associate %[[VAL_16]]#1, %[[VAL_16]]#2 : !fir.ref<i32>, i1 +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_assumed_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>) +! CHECK: %[[VAL_3:.*]] = acc.copyin var(%[[VAL_2]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.box<!fir.array<?xf32>>) { +! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] skip_rebox {uniq_name = "_QMmFtest_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>) +! CHECK: fir.call @_QPtakes_assumed_shape(%[[VAL_5]]#0) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_3]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_2]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_contiguous_assumed_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x", fir.contiguous}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_contiguous_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[ARG0]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_3]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index) +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_6]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFtest_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_8:.*]] = acc.copyin var(%[[VAL_7]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_8]] : !fir.box<!fir.array<?xf32>>) { +! CHECK: %[[VAL_9:.*]] = fir.box_addr %[[VAL_8]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> +! CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_9]](%[[VAL_6]]) dummy_scope %[[VAL_10]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFtest_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shapeshift<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_4]]#1 : (index) -> i64 +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_12]] : (i64) -> i32 +! CHECK: %[[VAL_14:.*]]:3 = hlfir.associate %[[VAL_13]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) +! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_11]]#1, %[[VAL_14]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> () +! CHECK: hlfir.end_associate %[[VAL_14]]#1, %[[VAL_14]]#2 : !fir.ref<i32>, i1 +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_8]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_7]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_pointer( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_pointerEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFtest_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) +! CHECK: %[[VAL_3:.*]] = acc.copyin varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) { +! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFtest_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) +! CHECK: fir.call @_QPtakes_pointer(%[[VAL_5]]#0) fastmath<contract> : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_3]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) to varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_using_both_results( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_using_both_resultsEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_using_both_resultsEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_10:.*]] = acc.copyin var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) { +! CHECK: %[[VAL_11:.*]] = fir.box_addr %[[VAL_10]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> +! CHECK: %[[VAL_12:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_11]](%[[VAL_8]]) dummy_scope %[[VAL_12]] {uniq_name = "_QMmFtest_using_both_resultsEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: fir.call @_QPtakes_assumed_shape(%[[VAL_13]]#0) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> () +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_7]] : (index) -> i64 +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_14]] : (i64) -> i32 +! CHECK: %[[VAL_16:.*]]:3 = hlfir.associate %[[VAL_15]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) +! CHECK: fir.call @_QPtakes_explicit_shape(%[[VAL_13]]#1, %[[VAL_16]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> () +! CHECK: hlfir.end_associate %[[VAL_16]]#1, %[[VAL_16]]#2 : !fir.ref<i32>, i1 +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_10]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_9]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPaddressing_cst_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<10x20xf32>> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_2:.*]] = arith.constant 20 : index +! CHECK: %[[VAL_3:.*]] = fir.shape %[[VAL_1]], %[[VAL_2]] : (index, index) -> !fir.shape<2> +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_3]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_cst_shapeEx"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>) +! CHECK: %[[VAL_5:.*]] = acc.copyin varPtr(%[[VAL_4]]#0 : !fir.ref<!fir.array<10x20xf32>>) -> !fir.ref<!fir.array<10x20xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_5]] : !fir.ref<!fir.array<10x20xf32>>) { +! CHECK: %[[VAL_6:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_5]](%[[VAL_3]]) dummy_scope %[[VAL_6]] {uniq_name = "_QMmFaddressing_cst_shapeEx"} : (!fir.ref<!fir.array<10x20xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.ref<!fir.array<10x20xf32>>, !fir.ref<!fir.array<10x20xf32>>) +! CHECK: %[[VAL_8:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_9:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_10:.*]] = hlfir.designate %[[VAL_7]]#0 (%[[VAL_8]], %[[VAL_9]]) : (!fir.ref<!fir.array<10x20xf32>>, index, index) -> !fir.ref<f32> +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_10]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_5]] : !fir.ref<!fir.array<10x20xf32>>) to varPtr(%[[VAL_4]]#0 : !fir.ref<!fir.array<10x20xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPaddressing_explicit_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}, +! CHECK-SAME: %[[ARG2:.*]]: !fir.ref<i32> {fir.bindc_name = "m"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG2]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEm"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i32) -> i64 +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i64) -> index +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_7:.*]] = arith.cmpi sgt, %[[VAL_5]], %[[VAL_6]] : index +! CHECK: %[[VAL_8:.*]] = arith.select %[[VAL_7]], %[[VAL_5]], %[[VAL_6]] : index +! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64 +! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_10]] : (i64) -> index +! CHECK: %[[VAL_12:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_13:.*]] = arith.cmpi sgt, %[[VAL_11]], %[[VAL_12]] : index +! CHECK: %[[VAL_14:.*]] = arith.select %[[VAL_13]], %[[VAL_11]], %[[VAL_12]] : index +! CHECK: %[[VAL_15:.*]] = fir.shape %[[VAL_8]], %[[VAL_14]] : (index, index) -> !fir.shape<2> +! CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_15]]) dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_explicit_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_17:.*]] = acc.copyin var(%[[VAL_16]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_17]] : !fir.box<!fir.array<?x?xf32>>) { +! CHECK: %[[VAL_18:.*]] = fir.box_addr %[[VAL_17]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>> +! CHECK: %[[VAL_19:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_20:.*]]:2 = hlfir.declare %[[VAL_18]](%[[VAL_15]]) dummy_scope %[[VAL_19]] {uniq_name = "_QMmFaddressing_explicit_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_21:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_22:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_23:.*]] = hlfir.designate %[[VAL_20]]#0 (%[[VAL_21]], %[[VAL_22]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32> +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_23]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_17]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_16]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPaddressing_assumed_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_assumed_shapeEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_3:.*]] = acc.copyin var(%[[VAL_2]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_3]] : !fir.box<!fir.array<?x?xf32>>) { +! CHECK: %[[VAL_4:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_3]] dummy_scope %[[VAL_4]] skip_rebox {uniq_name = "_QMmFaddressing_assumed_shapeEx"} : (!fir.box<!fir.array<?x?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.box<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_6:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_7:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]]#0 (%[[VAL_6]], %[[VAL_7]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32> +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_8]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_3]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_2]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPaddressing_contiguous_assumed_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?x?xf32>> {fir.bindc_name = "x", fir.contiguous}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[ARG0]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>> +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_3]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index) +! CHECK: %[[VAL_5:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_6:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_7:.*]]:3 = fir.box_dims %[[ARG0]], %[[VAL_6]] : (!fir.box<!fir.array<?x?xf32>>, index) -> (index, index, index) +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_9:.*]] = fir.shape_shift %[[VAL_5]], %[[VAL_4]]#1, %[[VAL_8]], %[[VAL_7]]#1 : (index, index, index, index) -> !fir.shapeshift<2> +! CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_2]](%[[VAL_9]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_11:.*]] = acc.copyin var(%[[VAL_10]]#0 : !fir.box<!fir.array<?x?xf32>>) -> !fir.box<!fir.array<?x?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_11]] : !fir.box<!fir.array<?x?xf32>>) { +! CHECK: %[[VAL_12:.*]] = fir.box_addr %[[VAL_11]] : (!fir.box<!fir.array<?x?xf32>>) -> !fir.ref<!fir.array<?x?xf32>> +! CHECK: %[[VAL_13:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_9]]) dummy_scope %[[VAL_13]] {fortran_attrs = #fir.var_attrs<contiguous>, uniq_name = "_QMmFaddressing_contiguous_assumed_shapeEx"} : (!fir.ref<!fir.array<?x?xf32>>, !fir.shapeshift<2>, !fir.dscope) -> (!fir.box<!fir.array<?x?xf32>>, !fir.ref<!fir.array<?x?xf32>>) +! CHECK: %[[VAL_15:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_16:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_14]]#0 (%[[VAL_15]], %[[VAL_16]]) : (!fir.box<!fir.array<?x?xf32>>, index, index) -> !fir.ref<f32> +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_17]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_11]] : !fir.box<!fir.array<?x?xf32>>) to var(%[[VAL_10]]#0 : !fir.box<!fir.array<?x?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPaddressing_pointer( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFaddressing_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) +! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QMmFaddressing_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) +! CHECK: %[[VAL_5:.*]] = fir.load %[[VAL_4]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>> +! CHECK: %[[VAL_6:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_7:.*]] = arith.constant 3 : index +! CHECK: %[[VAL_8:.*]] = hlfir.designate %[[VAL_5]] (%[[VAL_6]], %[[VAL_7]]) : (!fir.box<!fir.ptr<!fir.array<?x?xf32>>>, index, index) -> !fir.ref<f32> +! CHECK: fir.call @_QPtakes_scalar(%[[VAL_8]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) to varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?x?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_optional_scalar( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<f32> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) -> !fir.ref<f32> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<f32>) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_scalarEx"} : (!fir.ref<f32>, !fir.dscope) -> (!fir.ref<f32>, !fir.ref<f32>) +! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.ref<f32>) -> i1 +! CHECK: %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.ref<f32>) { +! CHECK: fir.result %[[VAL_4]]#0 : !fir.ref<f32> +! CHECK: } else { +! CHECK: %[[VAL_7:.*]] = fir.absent !fir.ref<f32> +! CHECK: fir.result %[[VAL_7]] : !fir.ref<f32> +! CHECK: } +! CHECK: fir.call @_QPtakes_optional_scalar(%[[VAL_6]]) fastmath<contract> : (!fir.ref<f32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<f32>) to varPtr(%[[VAL_1]]#0 : !fir.ref<f32>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_optional_explicit_cst_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<100xf32>> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = arith.constant 100 : index +! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_2]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>) +! CHECK: %[[VAL_4:.*]] = acc.copyin varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) -> !fir.ref<!fir.array<100xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) { +! CHECK: %[[VAL_5:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_4]](%[[VAL_2]]) dummy_scope %[[VAL_5]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_cst_shapeEx"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>) +! CHECK: %[[VAL_7:.*]] = fir.is_present %[[VAL_6]]#0 : (!fir.ref<!fir.array<100xf32>>) -> i1 +! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (!fir.ref<!fir.array<100xf32>>) { +! CHECK: fir.result %[[VAL_6]]#0 : !fir.ref<!fir.array<100xf32>> +! CHECK: } else { +! CHECK: %[[VAL_9:.*]] = fir.absent !fir.ref<!fir.array<100xf32>> +! CHECK: fir.result %[[VAL_9]] : !fir.ref<!fir.array<100xf32>> +! CHECK: } +! CHECK: fir.call @_QPtakes_optional_explicit_cst_shape(%[[VAL_8]]) fastmath<contract> : (!fir.ref<!fir.array<100xf32>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_4]] : !fir.ref<!fir.array<100xf32>>) to varPtr(%[[VAL_3]]#0 : !fir.ref<!fir.array<100xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_optional_explicit_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.array<?xf32>> {fir.bindc_name = "x", fir.optional}, +! CHECK-SAME: %[[ARG1:.*]]: !fir.ref<i32> {fir.bindc_name = "n"}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG1]] dummy_scope %[[VAL_0]] {uniq_name = "_QMmFtest_optional_explicit_shapeEn"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (i32) -> i64 +! CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (i64) -> index +! CHECK: %[[VAL_5:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_6:.*]] = arith.cmpi sgt, %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_7:.*]] = arith.select %[[VAL_6]], %[[VAL_4]], %[[VAL_5]] : index +! CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG0]](%[[VAL_8]]) dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_10:.*]] = acc.copyin varPtr(%[[VAL_9]]#1 : !fir.ref<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_10]] : !fir.ref<!fir.array<?xf32>>) { +! CHECK: %[[VAL_11:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_8]]) dummy_scope %[[VAL_11]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_explicit_shapeEx"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>) +! CHECK: %[[VAL_13:.*]] = fir.is_present %[[VAL_12]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1 +! CHECK: %[[VAL_14:.*]] = fir.if %[[VAL_13]] -> (!fir.ref<!fir.array<?xf32>>) { +! CHECK: fir.result %[[VAL_12]]#1 : !fir.ref<!fir.array<?xf32>> +! CHECK: } else { +! CHECK: %[[VAL_15:.*]] = fir.absent !fir.ref<!fir.array<?xf32>> +! CHECK: fir.result %[[VAL_15]] : !fir.ref<!fir.array<?xf32>> +! CHECK: } +! CHECK: fir.call @_QPtakes_optional_explicit_shape(%[[VAL_14]], %[[VAL_1]]#0) fastmath<contract> : (!fir.ref<!fir.array<?xf32>>, !fir.ref<i32>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_10]] : !fir.ref<!fir.array<?xf32>>) to varPtr(%[[VAL_9]]#1 : !fir.ref<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_optional_assumed_shape( +! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>) +! CHECK: %[[VAL_2:.*]] = acc.copyin var(%[[VAL_1]]#0 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.box<!fir.array<?xf32>>) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] skip_rebox {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QMmFtest_optional_assumed_shapeEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.box<!fir.array<?xf32>>) +! CHECK: %[[VAL_5:.*]] = fir.is_present %[[VAL_4]]#0 : (!fir.box<!fir.array<?xf32>>) -> i1 +! CHECK: %[[VAL_6:.*]] = fir.if %[[VAL_5]] -> (!fir.box<!fir.array<?xf32>>) { +! CHECK: fir.result %[[VAL_4]]#0 : !fir.box<!fir.array<?xf32>> +! CHECK: } else { +! CHECK: %[[VAL_7:.*]] = fir.absent !fir.box<!fir.array<?xf32>> +! CHECK: fir.result %[[VAL_7]] : !fir.box<!fir.array<?xf32>> +! CHECK: } +! CHECK: fir.call @_QPtakes_optional_assumed_shape(%[[VAL_6]]) fastmath<contract> : (!fir.box<!fir.array<?xf32>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accVar(%[[VAL_2]] : !fir.box<!fir.array<?xf32>>) to var(%[[VAL_1]]#0 : !fir.box<!fir.array<?xf32>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } + +! CHECK-LABEL: func.func @_QMmPtest_optional_pointer( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {fir.bindc_name = "x", fir.optional}) { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope %[[VAL_0]] {fortran_attrs = #fir.var_attrs<optional, pointer>, uniq_name = "_QMmFtest_optional_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) +! CHECK: %[[VAL_2:.*]] = acc.copyin varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: acc.parallel dataOperands(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) { +! CHECK: %[[VAL_3:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_2]] dummy_scope %[[VAL_3]] {fortran_attrs = #fir.var_attrs<optional, pointer>, uniq_name = "_QMmFtest_optional_pointerEx"} : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) +! CHECK: fir.call @_QPtakes_optional_pointer(%[[VAL_4]]#0) fastmath<contract> : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) -> () +! CHECK: acc.yield +! CHECK: } +! CHECK: acc.copyout accPtr(%[[VAL_2]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) to varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>>) {dataClause = #acc<data_clause acc_copy>, name = "x"} +! CHECK: return +! CHECK: } diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 index bc94837b..429f207 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 @@ -41,19 +41,21 @@ module m_firstprivate_derived_alloc_comp ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_6:.*]] = acc.firstprivate varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>> {name = "a"} ! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_alloc_compTpoint -> %[[VAL_6]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) { -! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_10:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_10]] : !fir.ref<i32>) control(%[[VAL_12:.*]] : i32) = (%[[VAL_7]] : i32) to (%[[VAL_8]] : i32) step (%[[VAL_9]] : i32) { -! CHECK: fir.store %[[VAL_12]] to %[[VAL_11]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f32 -! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_1]]#0{"x"} {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> -! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> -! CHECK: %[[VAL_16:.*]] = arith.constant 10 : index -! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32> -! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_17]] : f32, !fir.ref<f32> +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) { +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_alloc_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} {fortran_attrs = #fir.var_attrs<allocatable>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_alloc_compTpoint{x:!fir.box<!fir.heap<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>> +! CHECK: %[[VAL_18:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> !fir.ref<f32> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_19]] : f32, !fir.ref<f32> ! CHECK: acc.yield ! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} ! CHECK: acc.yield diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90 index f18d722..9ef4fe6 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-pointer-component.f90 @@ -41,19 +41,21 @@ module m_firstprivate_derived_ptr_comp ! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_6:.*]] = acc.firstprivate varPtr(%[[VAL_1]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>> {name = "a"} ! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_ptr_compTpoint -> %[[VAL_6]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) { -! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_8:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_7:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_6]] dummy_scope %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.dscope) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_10:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[VAL_11:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_10]] : !fir.ref<i32>) control(%[[VAL_12:.*]] : i32) = (%[[VAL_7]] : i32) to (%[[VAL_8]] : i32) step (%[[VAL_9]] : i32) { -! CHECK: fir.store %[[VAL_12]] to %[[VAL_11]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_13:.*]] = arith.constant 1.000000e+00 : f32 -! CHECK: %[[VAL_14:.*]] = hlfir.designate %[[VAL_1]]#0{"x"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> -! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_14]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> -! CHECK: %[[VAL_16:.*]] = arith.constant 10 : index -! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32> -! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_17]] : f32, !fir.ref<f32> +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_3]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) { +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_ptr_compFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} {fortran_attrs = #fir.var_attrs<pointer>} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_ptr_compTpoint{x:!fir.box<!fir.ptr<!fir.array<?xf32>>>}>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> +! CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_16]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xf32>>>> +! CHECK: %[[VAL_18:.*]] = arith.constant 10 : index +! CHECK: %[[VAL_19:.*]] = hlfir.designate %[[VAL_17]] (%[[VAL_18]]) : (!fir.box<!fir.ptr<!fir.array<?xf32>>>, index) -> !fir.ref<f32> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_19]] : f32, !fir.ref<f32> ! CHECK: acc.yield ! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} ! CHECK: acc.yield diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90 index f389c46..e90ec32 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-user-assign.f90 @@ -55,16 +55,17 @@ module m_firstprivate_derived_user_def ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_7:.*]] = acc.firstprivate varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>> {name = "a"} ! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derived_user_defTpoint -> %[[VAL_7]] : !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) { -! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_11]] : !fir.ref<i32>) control(%[[VAL_13:.*]] : i32) = (%[[VAL_8]] : i32) to (%[[VAL_9]] : i32) step (%[[VAL_10]] : i32) { -! CHECK: fir.store %[[VAL_13]] to %[[VAL_12]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_14:.*]] = arith.constant 1.000000e+00 : f32 -! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_2]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32> -! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_15]] : f32, !fir.ref<f32> +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) { + ! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derived_user_defFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derived_user_defTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32> ! CHECK: acc.yield ! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} ! CHECK: acc.yield diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90 index 677c3ae..e91fc9b 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived.f90 @@ -41,16 +41,17 @@ module m_firstprivate_derived ! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QMm_firstprivate_derivedFtestEn"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[VAL_7:.*]] = acc.firstprivate varPtr(%[[VAL_2]]#0 : !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>> {name = "a"} ! CHECK: acc.parallel combined(loop) firstprivate(@firstprivatization_ref_rec__QMm_firstprivate_derivedTpoint -> %[[VAL_7]] : !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) { -! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_9:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_11]] {uniq_name = "_QMm_firstprivate_derivedFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_11]] : !fir.ref<i32>) control(%[[VAL_13:.*]] : i32) = (%[[VAL_8]] : i32) to (%[[VAL_9]] : i32) step (%[[VAL_10]] : i32) { -! CHECK: fir.store %[[VAL_13]] to %[[VAL_12]]#0 : !fir.ref<i32> -! CHECK: %[[VAL_14:.*]] = arith.constant 1.000000e+00 : f32 -! CHECK: %[[VAL_15:.*]] = hlfir.designate %[[VAL_2]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32> -! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_15]] : f32, !fir.ref<f32> +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QMm_firstprivate_derivedFtestEa"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>, !fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) +! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_10:.*]] = fir.load %[[VAL_6]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_12:.*]] = acc.private varPtr(%[[VAL_4]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %[[VAL_12]] : !fir.ref<i32>) control(%[[VAL_14:.*]] : i32) = (%[[VAL_9]] : i32) to (%[[VAL_10]] : i32) step (%[[VAL_11]] : i32) { + ! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QMm_firstprivate_derivedFtestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: fir.store %[[VAL_14]] to %[[VAL_13]]#0 : !fir.ref<i32> +! CHECK: %[[VAL_15:.*]] = arith.constant 1.000000e+00 : f32 +! CHECK: %[[VAL_16:.*]] = hlfir.designate %[[VAL_8]]#0{"x"} : (!fir.ref<!fir.type<_QMm_firstprivate_derivedTpoint{x:f32,y:f32,z:f32}>>) -> !fir.ref<f32> +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_16]] : f32, !fir.ref<f32> ! CHECK: acc.yield ! CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} ! CHECK: acc.yield diff --git a/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90 b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90 new file mode 100644 index 0000000..ca932c1 --- /dev/null +++ b/flang/test/Lower/OpenACC/acc-loop-collapse-force-lowering.f90 @@ -0,0 +1,41 @@ +! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s + +! Verify collapse(force:2) sinks prologue (between loops) and epilogue (after inner loop) +! into the acc.loop region body. + +subroutine collapse_force_sink(n, m) + integer, intent(in) :: n, m + real, dimension(n,m) :: a + real, dimension(n) :: bb, cc + integer :: i, j + + !$acc parallel loop collapse(force:2) + do i = 1, n + bb(i) = 4.2 ! prologue (between loops) + do j = 1, m + a(i,j) = a(i,j) + 2.0 + end do + cc(i) = 7.3 ! epilogue (after inner loop) + end do + !$acc end parallel loop +end subroutine + +! CHECK: func.func @_QPcollapse_force_sink( +! CHECK: acc.parallel +! Ensure outer acc.loop is combined(parallel) +! CHECK: acc.loop combined(parallel) +! Prologue: constant 4.2 and an assign before inner loop +! CHECK: arith.constant 4.200000e+00 +! CHECK: hlfir.assign +! Inner loop and its body include 2.0 add and an assign +! CHECK: acc.loop +! CHECK: arith.constant 2.000000e+00 +! CHECK: arith.addf +! CHECK: hlfir.assign +! Epilogue: constant 7.3 and an assign after inner loop +! CHECK: arith.constant 7.300000e+00 +! CHECK: hlfir.assign +! And the outer acc.loop has collapse = [2] +! CHECK: } attributes {collapse = [2] + + diff --git a/flang/test/Lower/OpenACC/acc-loop-exit.f90 b/flang/test/Lower/OpenACC/acc-loop-exit.f90 index 85394e4..af11b34 100644 --- a/flang/test/Lower/OpenACC/acc-loop-exit.f90 +++ b/flang/test/Lower/OpenACC/acc-loop-exit.f90 @@ -16,8 +16,8 @@ end ! CHECK-LABEL: func.func @_QPsub1 ! CHECK: %[[A:.*]]:2 = hlfir.declare %arg1 dummy_scope %{{[0-9]+}} {uniq_name = "_QFsub1Ea"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: %[[EXIT_COND:.*]] = acc.loop +! CHECK: %[[I:.*]]:2 = hlfir.declare %{{[0-9]+}} {uniq_name = "_QFsub1Ei"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: ^bb{{.*}}: ! CHECK: ^bb{{.*}}: ! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#0 : !fir.ref<i32> diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index 5ca08a3..d37eb8d 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -426,7 +426,7 @@ end ! CHECK: %[[DECL_I:.*]]:2 = hlfir.declare %[[I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.parallel ! CHECK: %[[PRIV_I:.*]] = acc.private varPtr(%[[DECL_I]]#0 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop {{.*}} private(@privatization_ref_i32 -> %[[PRIV_I]] : !fir.ref<i32>) control(%[[IV0:.*]] : i32) = (%c1{{.*}} : i32) to (%c10{{.*}} : i32) step (%c1{{.*}} : i32) +! CHECK: %[[DECL_PRIV_I:.*]]:2 = hlfir.declare %[[PRIV_I]] {uniq_name = "_QFacc_private_useEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %[[IV0]] to %[[DECL_PRIV_I]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[DECL_PRIV_I]]#0 : !fir.ref<i32> diff --git a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 index a75a022..eaf734f 100644 --- a/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 +++ b/flang/test/Lower/OpenACC/do-loops-to-acc-loops.f90 @@ -19,8 +19,8 @@ subroutine basic_do_loop() ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -48,8 +48,8 @@ subroutine basic_do_concurrent() ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -77,8 +77,8 @@ subroutine basic_do_loop_parallel() ! CHECK: acc.parallel { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -106,8 +106,8 @@ subroutine basic_do_loop_serial() ! CHECK: acc.serial { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_loop_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -135,8 +135,8 @@ subroutine basic_do_concurrent_parallel() ! CHECK: acc.parallel { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_parallelEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -164,8 +164,8 @@ subroutine basic_do_concurrent_serial() ! CHECK: acc.serial { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFbasic_do_concurrent_serialEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -195,10 +195,10 @@ subroutine multi_dimension_do_concurrent() ! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} ! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} ! CHECK-DAG: %[[PRIVATE_K:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "k"} +! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) ! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFmulti_dimension_do_concurrentEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFmulti_dimension_do_concurrentEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK-DAG: %[[PRIVATE_K_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_K]] {uniq_name = "_QFmulti_dimension_do_concurrentEk"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) -! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[PRIVATE_K]] : !fir.ref<i32>) control(%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%{{.*}}, %{{.*}}, %{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> ! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> ! CHECK: fir.store %{{.*}} to %[[PRIVATE_K_DECLARE]]#0 : !fir.ref<i32> @@ -235,12 +235,12 @@ subroutine nested_do_loops() ! CHECK: acc.kernels { ! CHECK-DAG: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK-DAG: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_do_loopsEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> ! CHECK-DAG: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} -! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK-DAG: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_do_loopsEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> @@ -272,8 +272,8 @@ subroutine variable_bounds_and_step(n, start_val, step_val) ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_IV:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_IV]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_IV]] {uniq_name = "_QFvariable_bounds_and_stepEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_DECLARE]]#0 : !fir.ref<i32> @@ -315,22 +315,22 @@ subroutine different_iv_types() ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_I8:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i64>) -> !fir.ref<i64> {implicit = true, name = "i8"} -! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) ! CHECK: acc.loop private(@privatization_ref_i64 -> %[[PRIVATE_I8]] : !fir.ref<i64>) control(%{{.*}} : i64) = (%{{.*}} : i64) to (%{{.*}} : i64) step (%{{.*}} : i64) +! CHECK: %[[PRIVATE_I8_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I8]] {uniq_name = "_QFdifferent_iv_typesEi8"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I8_DECLARE]]#0 : !fir.ref<i64> ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_I4:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i4"} -! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I4]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_I4_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I4]] {uniq_name = "_QFdifferent_iv_typesEi4"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I4_DECLARE]]#0 : !fir.ref<i32> ! CHECK: acc.kernels { ! CHECK: %[[PRIVATE_I2:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i16>) -> !fir.ref<i16> {implicit = true, name = "i2"} -! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>) ! CHECK: acc.loop private(@privatization_ref_i16 -> %[[PRIVATE_I2]] : !fir.ref<i16>) control(%{{.*}} : i16) = (%{{.*}} : i16) to (%{{.*}} : i16) step (%{{.*}} : i16) +! CHECK: %[[PRIVATE_I2_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I2]] {uniq_name = "_QFdifferent_iv_typesEi2"} : (!fir.ref<i16>) -> (!fir.ref<i16>, !fir.ref<i16>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> ! CHECK: %{{.*}} = fir.load %[[PRIVATE_I2_DECLARE]]#0 : !fir.ref<i16> @@ -362,12 +362,12 @@ subroutine nested_loop_with_reduction(x, y) ! CHECK: %[[REDUCTION_X:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "x"} ! CHECK: %[[REDUCTION_Y:.*]] = acc.reduction varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {name = "y"} ! CHECK: %[[PRIVATE_I:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} -! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_I]] : !fir.ref<i32>) reduction(@reduction_add_ref_i32 -> %[[REDUCTION_X]] : !fir.ref<i32>, @reduction_add_ref_i32 -> %[[REDUCTION_Y]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_I_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_I]] {uniq_name = "_QFnested_loop_with_reductionEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_I_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %[[PRIVATE_J:.*]] = acc.private varPtr(%{{.*}} : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "j"} -! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: acc.loop private(@privatization_ref_i32 -> %[[PRIVATE_J]] : !fir.ref<i32>) control(%{{.*}} : i32) = (%{{.*}} : i32) to (%{{.*}} : i32) step (%{{.*}} : i32) +! CHECK: %[[PRIVATE_J_DECLARE:.*]]:2 = hlfir.declare %[[PRIVATE_J]] {uniq_name = "_QFnested_loop_with_reductionEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) ! CHECK: fir.store %{{.*}} to %[[PRIVATE_J_DECLARE]]#0 : !fir.ref<i32> ! CHECK: %{{.*}} = fir.load %{{.*}} : !fir.ref<i32> ! CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : i32 diff --git a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 index 142bc02a..c769152 100644 --- a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 +++ b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 @@ -112,7 +112,7 @@ integer :: i,j ! CHECK: omp.distribute { ! CHECK: omp.wsloop { ! CHECK: omp.simd private({{.*}}) { -! CHECK: omp.loop_nest (%[[I_IV:.*]], %[[J_IV:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { +! CHECK: omp.loop_nest (%[[I_IV:.*]], %[[J_IV:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) collapse(2) { ! CHECK: %[[Y_MAX_PRIV:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}y_max"} ! CHECK: %[[I_UB:.*]] = fir.load %[[X_MAX_MAPPED]]#0 : !fir.ref<i32> diff --git a/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 new file mode 100644 index 0000000..daea2f3 --- /dev/null +++ b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 @@ -0,0 +1,96 @@ +!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +! This test checks that the descriptor deferral behaviour of the +! MapInfoFinalization pass is preserved. Descriptor deferral is the +! act of removing the mapping of the descriptor in certain cases when +! a descriptor carrying type is mapped. This only applies in certain +! cases and to assumed shape and size dummy arguments that are not +! allocatable or pointers. + +subroutine assume_map_target_enter_exit(assumed_arr) + integer :: assumed_arr(:) + !$omp target enter data map(to: assumed_arr) + !$omp target + assumed_arr(1) = 10 + !$omp end target + !$omp target exit data map(from: assumed_arr) +end subroutine + +!CHECK-LABEL: func.func @_QPassume_map_target_enter_exit( +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[LOAD_BOX:.*]] = fir.load %[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%[[LOAD_BOX]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(to) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"} +!CHECK: omp.target_enter_data map_entries(%[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>) +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(implicit, to) capture(ByRef) members(%{{.*}} : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"} +!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[LOAD_BOX:.*]] = fir.load %[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%[[LOAD_BOX]] : !fir.ref<!fir.array<?xi32>>, i32) map_clauses(from) capture(ByRef) bounds(%{{.*}}) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"} +!CHECK: omp.target_exit_data map_entries(%[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>) + +subroutine assume_alloca_map_target_enter_exit(assumed_arr) + integer, allocatable :: assumed_arr(:) + !$omp target enter data map(to: assumed_arr) + !$omp target + assumed_arr(1) = 10 + !$omp end target + !$omp target exit data map(from: assumed_arr) +end subroutine + +!CHECK-LABEL: func.func @_QPassume_alloca_map_target_enter_exit( +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.box<!fir.heap<!fir.array<?xi32>>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) + +subroutine assume_pointer_map_target_enter_exit(assumed_arr) + integer, pointer :: assumed_arr(:) + !$omp target enter data map(to: assumed_arr) + !$omp target + assumed_arr(1) = 10 + !$omp end target + !$omp target exit data map(from: assumed_arr) +end subroutine + +!CHECK-LABEL: func.func @_QPassume_pointer_map_target_enter_exit( +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target_enter_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.box<!fir.ptr<!fir.array<?xi32>>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> {name = "assumed_arr"} +!CHECK: omp.target_exit_data map_entries(%[[DESC_MAP]], %[[BOX_ADDR_MAP]] : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) + +subroutine assume_map_target_data(assumed_arr) + integer :: assumed_arr(:) + !$omp target data map(to: assumed_arr) + !$omp target + assumed_arr(1) = 10 + !$omp end target + !$omp end target data +end subroutine + +!CHECK-LABEL: func.func @_QPassume_map_target_data( +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(to) capture(ByRef) members(%[[MAP_ADDR]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"} +!CHECK: omp.target_data map_entries(%[[MAP_BOX]], %[[MAP_ADDR]] : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { +!CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref<!fir.box<!fir.array<?xi32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> +!CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>> {name = ""} +!CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.box<!fir.array<?xi32>>>, !fir.box<!fir.array<?xi32>>) map_clauses(implicit, to) capture(ByRef) members(%[[MAP_ADDR]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) -> !fir.ref<!fir.array<?xi32>> {name = "assumed_arr"} +!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref<!fir.array<?xi32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xi32>>>) { diff --git a/flang/test/Lower/generic-shadows-specific.F90 b/flang/test/Lower/generic-shadows-specific.F90 new file mode 100644 index 0000000..e721908 --- /dev/null +++ b/flang/test/Lower/generic-shadows-specific.F90 @@ -0,0 +1,40 @@ + +#if STEP == 1 +! these modules must be read from module files +module generic_shadows_specific_m1 + interface f ! reference must be to generic + module procedure f ! must have same name as generic interface + end interface + contains + character function f() ! must be character + f = 'q' + end +end +module generic_shadows_specific_m2 + use generic_shadows_specific_m1 +end +module generic_shadows_specific_m3 + use generic_shadows_specific_m2 ! must be generic_shadows_specific_m2, not generic_shadows_specific_m1 + contains + subroutine mustExist() ! not called, but must exist + character x + x = f() + end +end + +#else +! Check that expected code produced with no crash. +subroutine reproducer() + use generic_shadows_specific_m2 + use generic_shadows_specific_m3 + character x + x = f() +end +#endif + +!RUN: rm -rf %t && mkdir -p %t +!RUN: %flang_fc1 -fsyntax-only -DSTEP=1 -J%t %s +!RUN: %flang_fc1 -emit-fir -J%t -o - %s | FileCheck %s + +!CHECK-LABEL: func.func @_QPreproducer +!CHECK: fir.call @_QMgeneric_shadows_specific_m1Pf diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir index ed814cd..7bc0ae4 100644 --- a/flang/test/Transforms/omp-map-info-finalization.fir +++ b/flang/test/Transforms/omp-map-info-finalization.fir @@ -326,15 +326,15 @@ func.func @_QPreuse_alloca(%arg0: !fir.box<!fir.array<?xf64>> {fir.bindc_name = // CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]] // CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]] // CHECK: omp.target_data map_entries -// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]] -// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[ALLOCA]] +// CHECK: %[[BOX_OFFSET:.*]] = fir.box_offset %[[ALLOCA]] +// CHECK: %[[LOAD_OFFSET:.*]] = fir.load %[[BOX_OFFSET]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf64>>> +// CHECK: %{{[0-9]+}} = omp.map.info var_ptr(%[[LOAD_OFFSET]] // CHECK: omp.target_update map_entries // CHECK: omp.terminator // CHECK: } // CHECK: return - omp.private {type = firstprivate} @boxchar.privatizer : !fir.boxchar<1> copy { ^bb0(%arg0: !fir.boxchar<1>, %arg1: !fir.boxchar<1>): omp.yield(%arg0 : !fir.boxchar<1>) |