diff options
Diffstat (limited to 'flang')
59 files changed, 1667 insertions, 216 deletions
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index d8d0956..20a0919 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1289,16 +1289,7 @@ bool CheckForCoindexedObject(parser::ContextualMessages &, const std::optional<ActualArgument> &, const std::string &procName, const std::string &argName); -inline bool IsCUDADeviceSymbol(const Symbol &sym) { - if (const auto *details = - sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) { - if (details->cudaDataAttr() && - *details->cudaDataAttr() != common::CUDADataAttr::Pinned) { - return true; - } - } - return false; -} +bool IsCUDADeviceSymbol(const Symbol &sym); inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) { if (const auto *details = diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt index 3336ac9..68af52f 100644 --- a/flang/include/flang/Optimizer/CMakeLists.txt +++ b/flang/include/flang/Optimizer/CMakeLists.txt @@ -2,4 +2,5 @@ add_subdirectory(CodeGen) add_subdirectory(Dialect) add_subdirectory(HLFIR) add_subdirectory(Transforms) +add_subdirectory(OpenACC) add_subdirectory(OpenMP) diff --git a/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt new file mode 100644 index 0000000..a032488 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt @@ -0,0 +1,4 @@ +set(LLVM_TARGET_DEFINITIONS Passes.td) +mlir_tablegen(Passes.h.inc -gen-pass-decls -name FIROpenACC) + +add_public_tablegen_target(FIROpenACCPassesIncGen) diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h new file mode 100644 index 0000000..0627cc8 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/Passes.h @@ -0,0 +1,33 @@ +//===- Passes.h - OpenACC pass entry points -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header declares the OpenACC passes specific to Fortran and FIR. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES_H +#define FORTRAN_OPTIMIZER_OPENACC_PASSES_H + +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassRegistry.h" + +#include <memory> + +namespace fir { +namespace acc { +#define GEN_PASS_DECL +#define GEN_PASS_REGISTRATION +#include "flang/Optimizer/OpenACC/Passes.h.inc" + +std::unique_ptr<mlir::Pass> createACCRecipeBufferizationPass(); + +} // namespace acc +} // namespace fir + +#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES_H diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td new file mode 100644 index 0000000..3c127b3 --- /dev/null +++ b/flang/include/flang/Optimizer/OpenACC/Passes.td @@ -0,0 +1,36 @@ +//===-- Passes.td - flang OpenACC pass definitions -----------*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES +#define FORTRAN_OPTIMIZER_OPENACC_PASSES + +include "mlir/Pass/PassBase.td" + +def ACCRecipeBufferization + : Pass<"fir-acc-recipe-bufferization", "mlir::ModuleOp"> { + let summary = "Rewrite acc.*.recipe box values to ref<box> and update uses"; + let description = [{ + Bufferizes OpenACC recipes that operate on fir.box<T> so their type and + region block arguments become fir.ref<fir.box<T>> instead. This applies to + acc.private.recipe, acc.firstprivate.recipe (including copy region), and + acc.reduction.recipe (including combiner region). + + For affected regions, the pass inserts required loads at the beginning of + the region to preserve original uses after argument type changes. For yields + of box values, the pass allocates a local fir.ref<fir.box<T>> and stores the + yielded fir.box<T> into it so the region yields a reference to a box. + + For acc.private, acc.firstprivate, and acc.reduction operations that use a + bufferized recipe, the pass allocates a host-side fir.ref<fir.box<T>> before + the data op and rewires the data op to use the new memory. Other users of + the original data operation result (outside the paired compute op) are + updated to load through the reference. + }]; +} + +#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 1085393..408f039 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -57,8 +57,11 @@ struct OpenACCMappableModel mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, - mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, + bool &needsDestroy) const; + + bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder, + mlir::Location loc, mlir::Value privatized) const; }; } // namespace fir::acc diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h index 0b31cfe..bbb7e6e 100644 --- a/flang/include/flang/Optimizer/Support/Utils.h +++ b/flang/include/flang/Optimizer/Support/Utils.h @@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault( fir::RecordType recordType, llvm::StringRef component, mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr); +/// Indicate if a derived type has final routine. Returns std::nullopt if that +/// information is not in the IR; +std::optional<bool> +isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable = nullptr); + /// Generate a LLVM constant value of type `ity`, using the provided offset. mlir::LLVM::ConstantOp genConstantIndex(mlir::Location loc, mlir::Type ity, diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 1488529..91af92c0 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -520,6 +520,8 @@ public: NODE(parser, OmpAtClause) NODE_ENUM(OmpAtClause, ActionTime) NODE(parser, OmpAtomicDefaultMemOrderClause) + NODE(parser, OmpAttachModifier) + NODE_ENUM(OmpAttachModifier, Value) NODE(parser, OmpAutomapModifier) NODE_ENUM(OmpAutomapModifier, Value) NODE(parser, OmpBaseVariantNames) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index d919b77..f52323c 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3813,6 +3813,18 @@ struct OmpAlwaysModifier { WRAPPER_CLASS_BOILERPLATE(OmpAlwaysModifier, Value); }; +// Ref: [coming in 6.1] +// +// attach-modifier -> +// ATTACH(attachment-mode) // since 6.1 +// +// attachment-mode -> +// ALWAYS | AUTO | NEVER +struct OmpAttachModifier { + ENUM_CLASS(Value, Always, Never, Auto) + WRAPPER_CLASS_BOILERPLATE(OmpAttachModifier, Value); +}; + // Ref: [6.0:289-290] // // automap-modifier -> @@ -4575,6 +4587,7 @@ struct OmpLoopRangeClause { // modifier -> // map-type-modifier [replaced] | // since 4.5, until 5.2 // always-modifier | // since 6.0 +// attach-modifier | // since 6.1 // close-modifier | // since 6.0 // delete-modifier | // since 6.0 // present-modifier | // since 6.0 @@ -4589,9 +4602,9 @@ struct OmpLoopRangeClause { // and delete-modifier has been split from map-type. struct OmpMapClause { TUPLE_CLASS_BOILERPLATE(OmpMapClause); - MODIFIER_BOILERPLATE(OmpAlwaysModifier, OmpCloseModifier, OmpDeleteModifier, - OmpMapTypeModifier, OmpPresentModifier, OmpRefModifier, OmpSelfModifier, - OmpMapper, OmpIterator, OmpMapType, OmpxHoldModifier); + MODIFIER_BOILERPLATE(OmpAlwaysModifier, OmpAttachModifier, OmpCloseModifier, + OmpDeleteModifier, OmpMapTypeModifier, OmpPresentModifier, OmpRefModifier, + OmpSelfModifier, OmpMapper, OmpIterator, OmpMapType, OmpxHoldModifier); std::tuple<MODIFIERS(), OmpObjectList, /*CommaSeparated=*/bool> t; }; diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h index e0eae98..bfa3aa4 100644 --- a/flang/include/flang/Semantics/openmp-modifiers.h +++ b/flang/include/flang/Semantics/openmp-modifiers.h @@ -72,6 +72,7 @@ DECLARE_DESCRIPTOR(parser::OmpAlignModifier); DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier); DECLARE_DESCRIPTOR(parser::OmpAllocatorSimpleModifier); DECLARE_DESCRIPTOR(parser::OmpAlwaysModifier); +DECLARE_DESCRIPTOR(parser::OmpAttachModifier); DECLARE_DESCRIPTOR(parser::OmpAutomapModifier); DECLARE_DESCRIPTOR(parser::OmpChunkModifier); DECLARE_DESCRIPTOR(parser::OmpCloseModifier); diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index 2954a1c..0f85183 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -38,6 +38,7 @@ template <typename T, typename U = std::remove_const_t<T>> U AsRvalue(T &t) { template <typename T> T &&AsRvalue(T &&t) { return std::move(t); } const Scope &GetScopingUnit(const Scope &scope); +const Scope &GetProgramUnit(const Scope &scope); // There is no consistent way to get the source of an ActionStmt, but there // is "source" in Statement<T>. This structure keeps the ActionStmt with the diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 77f567e..04a0639 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -16,6 +16,7 @@ #include "flang/Semantics/module-dependences.h" #include "flang/Support/Fortran.h" #include "llvm/ADT/DenseMapInfo.h" +#include "llvm/Frontend/OpenMP/OMP.h" #include <array> #include <functional> @@ -50,32 +51,34 @@ using MutableSymbolVector = std::vector<MutableSymbolRef>; // Mixin for details with OpenMP declarative constructs. class WithOmpDeclarative { - using OmpAtomicOrderType = common::OmpMemoryOrderType; - public: - ENUM_CLASS(RequiresFlag, ReverseOffload, UnifiedAddress, UnifiedSharedMemory, - DynamicAllocators); - using RequiresFlags = common::EnumSet<RequiresFlag, RequiresFlag_enumSize>; + // The set of requirements for any program unit include requirements + // from any module used in the program unit. + using RequiresClauses = + common::EnumSet<llvm::omp::Clause, llvm::omp::Clause_enumSize>; bool has_ompRequires() const { return ompRequires_.has_value(); } - const RequiresFlags *ompRequires() const { + const RequiresClauses *ompRequires() const { return ompRequires_ ? &*ompRequires_ : nullptr; } - void set_ompRequires(RequiresFlags flags) { ompRequires_ = flags; } + void set_ompRequires(RequiresClauses clauses) { ompRequires_ = clauses; } bool has_ompAtomicDefaultMemOrder() const { return ompAtomicDefaultMemOrder_.has_value(); } - const OmpAtomicOrderType *ompAtomicDefaultMemOrder() const { + const common::OmpMemoryOrderType *ompAtomicDefaultMemOrder() const { return ompAtomicDefaultMemOrder_ ? &*ompAtomicDefaultMemOrder_ : nullptr; } - void set_ompAtomicDefaultMemOrder(OmpAtomicOrderType flags) { + void set_ompAtomicDefaultMemOrder(common::OmpMemoryOrderType flags) { ompAtomicDefaultMemOrder_ = flags; } + friend llvm::raw_ostream &operator<<( + llvm::raw_ostream &, const WithOmpDeclarative &); + private: - std::optional<RequiresFlags> ompRequires_; - std::optional<OmpAtomicOrderType> ompAtomicDefaultMemOrder_; + std::optional<RequiresClauses> ompRequires_; + std::optional<common::OmpMemoryOrderType> ompAtomicDefaultMemOrder_; }; // A module or submodule. diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index f204eef..1de5e6b 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -111,6 +111,7 @@ ENUM_CLASS(KindCode, none, defaultIntegerKind, atomicIntKind, // atomic_int_kind from iso_fortran_env atomicIntOrLogicalKind, // atomic_int_kind or atomic_logical_kind sameAtom, // same type and kind as atom + extensibleOrUnlimitedType, // extensible or unlimited polymorphic type ) struct TypePattern { @@ -160,7 +161,8 @@ static constexpr TypePattern AnyChar{CharType, KindCode::any}; static constexpr TypePattern AnyLogical{LogicalType, KindCode::any}; static constexpr TypePattern AnyRelatable{RelatableType, KindCode::any}; static constexpr TypePattern AnyIntrinsic{IntrinsicType, KindCode::any}; -static constexpr TypePattern ExtensibleDerived{DerivedType, KindCode::any}; +static constexpr TypePattern ExtensibleDerived{ + DerivedType, KindCode::extensibleOrUnlimitedType}; static constexpr TypePattern AnyData{AnyType, KindCode::any}; // Type is irrelevant, but not BOZ (for PRESENT(), OPTIONAL(), &c.) @@ -2103,9 +2105,13 @@ std::optional<SpecificCall> IntrinsicInterface::Match( } return std::nullopt; } else if (!d.typePattern.categorySet.test(type->category())) { + const char *expected{ + d.typePattern.kindCode == KindCode::extensibleOrUnlimitedType + ? ", expected extensible or unlimited polymorphic type" + : ""}; messages.Say(arg->sourceLocation(), - "Actual argument for '%s=' has bad type '%s'"_err_en_US, d.keyword, - type->AsFortran()); + "Actual argument for '%s=' has bad type '%s'%s"_err_en_US, d.keyword, + type->AsFortran(), expected); return std::nullopt; // argument has invalid type category } bool argOk{false}; @@ -2244,6 +2250,17 @@ std::optional<SpecificCall> IntrinsicInterface::Match( return std::nullopt; } break; + case KindCode::extensibleOrUnlimitedType: + argOk = type->IsUnlimitedPolymorphic() || + (type->category() == TypeCategory::Derived && + IsExtensibleType(GetDerivedTypeSpec(type))); + if (!argOk) { + messages.Say(arg->sourceLocation(), + "Actual argument for '%s=' has type '%s', but was expected to be an extensible or unlimited polymorphic type"_err_en_US, + d.keyword, type->AsFortran()); + return std::nullopt; + } + break; default: CRASH_NO_CASE; } diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index b927fa3..bd06acc 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1153,6 +1153,18 @@ bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) { return (hasConstant || (hostSymbols.size() > 0)) && deviceSymbols.size() > 0; } +bool IsCUDADeviceSymbol(const Symbol &sym) { + if (const auto *details = + sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) { + return details->cudaDataAttr() && + *details->cudaDataAttr() != common::CUDADataAttr::Pinned; + } else if (const auto *details = + sym.GetUltimate().detailsIf<semantics::AssocEntityDetails>()) { + return GetNbOfCUDADeviceSymbols(details->expr()) > 0; + } + return false; +} + // HasVectorSubscript() struct HasVectorSubscriptHelper : public AnyTraverse<HasVectorSubscriptHelper, bool, diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 53239cb..e7a6c4d 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -629,6 +629,10 @@ private: unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol()); fir::ExtendedValue exv = isSource ? sourceExv : moldExv; + if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)}) + if (Fortran::semantics::IsCUDADevice(*sym)) + TODO(loc, "CUDA Fortran: allocate with device source"); + // Generate a sequence of runtime calls. errorManager.genStatCheck(builder, loc); genAllocateObjectInit(box, allocatorIdx); @@ -767,6 +771,15 @@ private: const fir::MutableBoxValue &box, ErrorManager &errorManager, const Fortran::semantics::Symbol &sym) { + + if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType()) + if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec = + declTypeSpec->AsDerived()) + if (derivedTypeSpec->HasDefaultInitialization( + /*ignoreAllocatable=*/true, /*ignorePointer=*/true)) + TODO(loc, + "CUDA Fortran: allocate on device with default initialization"); + Fortran::lower::StatementContext stmtCtx; cuf::DataAttributeAttr cudaAttr = Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(), diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 62e5c0c..cfb1891 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -978,15 +978,40 @@ static RecipeOp genRecipeOp( auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty); assert(mappableTy && "Expected that all variable types are considered mappable"); + bool needsDestroy = false; auto retVal = mappableTy.generatePrivateInit( builder, loc, mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>( initBlock->getArgument(0)), initName, initBlock->getArguments().take_back(initBlock->getArguments().size() - 1), - initValue); + initValue, needsDestroy); mlir::acc::YieldOp::create(builder, loc, retVal ? retVal : initBlock->getArgument(0)); + // Create destroy region and generate destruction if requested. + if (needsDestroy) { + llvm::SmallVector<mlir::Type> destroyArgsTy; + llvm::SmallVector<mlir::Location> destroyArgsLoc; + // original and privatized/reduction value + destroyArgsTy.push_back(ty); + destroyArgsTy.push_back(ty); + destroyArgsLoc.push_back(loc); + destroyArgsLoc.push_back(loc); + // Append bounds arguments (if any) in the same order as init region + if (argsTy.size() > 1) { + destroyArgsTy.append(argsTy.begin() + 1, argsTy.end()); + destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc); + } + + builder.createBlock(&recipe.getDestroyRegion(), + recipe.getDestroyRegion().end(), destroyArgsTy, + destroyArgsLoc); + builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back()); + // Call interface on the privatized/reduction value (2nd argument). + (void)mappableTy.generatePrivateDestroy( + builder, loc, recipe.getDestroyRegion().front().getArgument(1)); + mlir::acc::TerminatorOp::create(builder, loc); + } return recipe; } diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 55eda7e..85398be 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -1343,8 +1343,10 @@ bool ClauseProcessor::processMap( const parser::CharBlock &source) { using Map = omp::clause::Map; mlir::Location clauseLocation = converter.genLocation(source); - const auto &[mapType, typeMods, refMod, mappers, iterator, objects] = - clause.t; + const auto &[mapType, typeMods, attachMod, refMod, mappers, iterator, + objects] = clause.t; + if (attachMod) + TODO(currentLocation, "ATTACH modifier is not implemented yet"); llvm::omp::OpenMPOffloadMappingFlags mapTypeBits = llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE; std::string mapperIdName = "__implicit_mapper"; diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index fac37a3..0842c62 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -1069,6 +1069,15 @@ Map make(const parser::OmpClause::Map &inp, ); CLAUSET_ENUM_CONVERT( // + convertAttachMod, parser::OmpAttachModifier::Value, Map::AttachModifier, + // clang-format off + MS(Always, Always) + MS(Auto, Auto) + MS(Never, Never) + // clang-format on + ); + + CLAUSET_ENUM_CONVERT( // convertRefMod, parser::OmpRefModifier::Value, Map::RefModifier, // clang-format off MS(Ref_Ptee, RefPtee) @@ -1115,6 +1124,13 @@ Map make(const parser::OmpClause::Map &inp, if (!modSet.empty()) maybeTypeMods = Map::MapTypeModifiers(modSet.begin(), modSet.end()); + auto attachMod = [&]() -> std::optional<Map::AttachModifier> { + if (auto *t = + semantics::OmpGetUniqueModifier<parser::OmpAttachModifier>(mods)) + return convertAttachMod(t->v); + return std::nullopt; + }(); + auto refMod = [&]() -> std::optional<Map::RefModifier> { if (auto *t = semantics::OmpGetUniqueModifier<parser::OmpRefModifier>(mods)) return convertRefMod(t->v); @@ -1135,6 +1151,7 @@ Map make(const parser::OmpClause::Map &inp, return Map{{/*MapType=*/std::move(type), /*MapTypeModifiers=*/std::move(maybeTypeMods), + /*AttachModifier=*/std::move(attachMod), /*RefModifier=*/std::move(refMod), /*Mapper=*/std::move(mappers), /*Iterator=*/std::move(iterator), /*LocatorList=*/makeObjects(t2, semaCtx)}}; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 444f274..f86ee01 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -4208,18 +4208,17 @@ bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, const semantics::Symbol *symbol) { using MlirRequires = mlir::omp::ClauseRequires; - using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag; if (auto offloadMod = llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) { - semantics::WithOmpDeclarative::RequiresFlags semaFlags; + semantics::WithOmpDeclarative::RequiresClauses reqs; if (symbol) { common::visit( [&](const auto &details) { if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative, std::decay_t<decltype(details)>>) { if (details.has_ompRequires()) - semaFlags = *details.ompRequires(); + reqs = *details.ompRequires(); } }, symbol->details()); @@ -4228,14 +4227,14 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod, // Use pre-populated omp.requires module attribute if it was set, so that // the "-fopenmp-force-usm" compiler option is honored. MlirRequires mlirFlags = offloadMod.getRequires(); - if (semaFlags.test(SemaRequires::ReverseOffload)) + if (reqs.test(llvm::omp::Clause::OMPC_dynamic_allocators)) + mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; + if (reqs.test(llvm::omp::Clause::OMPC_reverse_offload)) mlirFlags = mlirFlags | MlirRequires::reverse_offload; - if (semaFlags.test(SemaRequires::UnifiedAddress)) + if (reqs.test(llvm::omp::Clause::OMPC_unified_address)) mlirFlags = mlirFlags | MlirRequires::unified_address; - if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) + if (reqs.test(llvm::omp::Clause::OMPC_unified_shared_memory)) mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; - if (semaFlags.test(SemaRequires::DynamicAllocators)) - mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; offloadMod.setRequires(mlirFlags); } diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 0afb295..70bb43a2 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> { llvm::LogicalResult matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const override { + + if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) { + auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); + replaceWithAddrOfOrASCast( + rewriter, addr->getLoc(), + global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter), + getProgramAddressSpace(rewriter), + global ? global.getSymName() + : addr.getSymbol().getRootReference().getValue(), + convertType(addr.getType()), addr); + return mlir::success(); + } + auto global = addr->getParentOfType<mlir::ModuleOp>() .lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol()); replaceWithAddrOfOrASCast( @@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { if (global.getDataAttr() && *global.getDataAttr() == cuf::DataAttribute::Constant) - TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + g.setAddrSpace( + static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant)); rewriter.eraseOp(global); return mlir::success(); diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt index fc23e64..790b9fd 100644 --- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt +++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt @@ -1 +1,2 @@ add_subdirectory(Support) +add_subdirectory(Transforms) diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index 89aa010..9bf10b5 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -21,6 +21,7 @@ #include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" +#include "flang/Optimizer/Support/Utils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/IR/BuiltinOps.h" @@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) { // calculation op. mlir::Value baseRef = llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op) + .Case<fir::DeclareOp>([&](auto op) { + // If this declare binds a view with an underlying storage operand, + // treat that storage as the base reference. Otherwise, fall back + // to the declared memref. + if (auto storage = op.getStorage()) + return storage; + return mlir::Value(varPtr); + }) .Case<hlfir::DesignateOp>([&](auto op) { // Get the base object. return op.getMemref(); @@ -548,14 +557,27 @@ template <typename Ty> mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const { + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const { + needsDestroy = false; mlir::Value retVal; mlir::Type unwrappedTy = fir::unwrapRefType(type); mlir::ModuleOp mod = builder.getInsertionBlock() ->getParent() ->getParentOfType<mlir::ModuleOp>(); - fir::FirOpBuilder firBuilder(builder, mod); + if (auto recType = llvm::dyn_cast<fir::RecordType>( + fir::getFortranElementType(unwrappedTy))) { + // Need to make deep copies of allocatable components. + if (fir::isRecordWithAllocatableMember(recType)) + TODO(loc, + "OpenACC: privatizing derived type with allocatable components"); + // Need to decide if user assignment/final routine should be called. + if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false)) + TODO(loc, "OpenACC: privatizing derived type with user assignment or " + "final routine "); + } + + fir::FirOpBuilder firBuilder(builder, mod); auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp { auto alloca = fir::AllocaOp::create(firBuilder, loc, ty); return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName); @@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( mlir::Value firClass = fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar); fir::StoreOp::create(builder, loc, firClass, retVal); + needsDestroy = true; } else if (mlir::isa<fir::SequenceType>(innerTy)) { hlfir::Entity source = hlfir::Entity{var}; - auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source); + auto [temp, cleanupFlag] = + hlfir::createTempFromMold(loc, firBuilder, source); if (fir::isa_ref_type(type)) { // When the temp is created - it is not a reference - thus we can // end up with a type inconsistency. Therefore ensure storage is created @@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit( } else { retVal = temp; } + // If heap was allocated, a destroy is required later. + if (cleanupFlag) + needsDestroy = true; } else { TODO(loc, "Unsupported boxed type for OpenACC private-like recipe"); } @@ -667,23 +694,61 @@ template mlir::Value OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; template mlir::Value OpenACCMappableModel<fir::PointerType>::generatePrivateInit( mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName, - mlir::ValueRange extents, mlir::Value initVal) const; + mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const; + +template <typename Ty> +bool OpenACCMappableModel<Ty>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const { + mlir::Type unwrappedTy = fir::unwrapRefType(type); + // For boxed scalars allocated with AllocMem during init, free the heap. + if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) { + mlir::Value boxVal = privatized; + if (fir::isa_ref_type(boxVal.getType())) + boxVal = fir::LoadOp::create(builder, loc, boxVal); + mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal); + // FreeMem only accepts fir.heap and this may not be represented in the box + // type if the privatized entity is not an allocatable. + mlir::Type heapType = + fir::HeapType::get(fir::unwrapRefType(addr.getType())); + if (heapType != addr.getType()) + addr = fir::ConvertOp::create(builder, loc, heapType, addr); + fir::FreeMemOp::create(builder, loc, addr); + return true; + } + + // Nothing to do for other categories by default, they are stack allocated. + return true; +} + +template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; +template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy( + mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value privatized) const; } // namespace fir::acc diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp new file mode 100644 index 0000000..4840a99 --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp @@ -0,0 +1,191 @@ +//===- ACCRecipeBufferization.cpp -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bufferize OpenACC recipes that yield fir.box<T> to operate on +// fir.ref<fir.box<T>> and update uses accordingly. +// +//===----------------------------------------------------------------------===// + +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/OpenACC/Passes.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/IR/Block.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Value.h" +#include "mlir/IR/Visitors.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace fir::acc { +#define GEN_PASS_DEF_ACCRECIPEBUFFERIZATION +#include "flang/Optimizer/OpenACC/Passes.h.inc" +} // namespace fir::acc + +namespace { + +class BufferizeInterface { +public: + static std::optional<mlir::Type> mustBufferize(mlir::Type recipeType) { + if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(recipeType)) + return fir::ReferenceType::get(boxTy); + return std::nullopt; + } + + static mlir::Operation *load(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + return builder.create<fir::LoadOp>(loc, value); + } + + static mlir::Value placeInMemory(mlir::OpBuilder &builder, mlir::Location loc, + mlir::Value value) { + auto alloca = builder.create<fir::AllocaOp>(loc, value.getType()); + builder.create<fir::StoreOp>(loc, value, alloca); + return alloca; + } +}; + +static void bufferizeRegionArgsAndYields(mlir::Region ®ion, + mlir::Location loc, mlir::Type oldType, + mlir::Type newType) { + if (region.empty()) + return; + + mlir::OpBuilder builder(®ion); + for (mlir::BlockArgument arg : region.getArguments()) { + if (arg.getType() == oldType) { + arg.setType(newType); + if (!arg.use_empty()) { + mlir::Operation *loadOp = BufferizeInterface::load(builder, loc, arg); + arg.replaceAllUsesExcept(loadOp->getResult(0), loadOp); + } + } + } + if (auto yield = + llvm::dyn_cast<mlir::acc::YieldOp>(region.back().getTerminator())) { + llvm::SmallVector<mlir::Value> newOperands; + newOperands.reserve(yield.getNumOperands()); + bool changed = false; + for (mlir::Value oldYieldArg : yield.getOperands()) { + if (oldYieldArg.getType() == oldType) { + builder.setInsertionPoint(yield); + mlir::Value alloca = + BufferizeInterface::placeInMemory(builder, loc, oldYieldArg); + newOperands.push_back(alloca); + changed = true; + } else { + newOperands.push_back(oldYieldArg); + } + } + if (changed) + yield->setOperands(newOperands); + } +} + +static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands, + llvm::StringRef recipeSymName, + mlir::Operation *computeOp) { + if (!recipes) + return; + for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) { + if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() != + recipeSymName) + continue; + + mlir::Operation *dataOp = oldRes.getDefiningOp(); + assert(dataOp && "dataOp must be paired with computeOp"); + mlir::Location loc = dataOp->getLoc(); + mlir::OpBuilder builder(dataOp); + llvm::TypeSwitch<mlir::Operation *, void>(dataOp) + .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp, + mlir::acc::ReductionOp>([&](auto privateOp) { + builder.setInsertionPointAfterValue(privateOp.getVar()); + mlir::Value alloca = BufferizeInterface::placeInMemory( + builder, loc, privateOp.getVar()); + privateOp.getVarMutable().assign(alloca); + privateOp.getAccVar().setType(alloca.getType()); + }); + + llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(), + oldRes.getUsers().end()); + for (mlir::Operation *useOp : users) { + if (useOp == computeOp) + continue; + builder.setInsertionPoint(useOp); + mlir::Operation *load = BufferizeInterface::load(builder, loc, oldRes); + useOp->replaceUsesOfWith(oldRes, load->getResult(0)); + } + } +} + +class ACCRecipeBufferization + : public fir::acc::impl::ACCRecipeBufferizationBase< + ACCRecipeBufferization> { +public: + void runOnOperation() override { + mlir::ModuleOp module = getOperation(); + + llvm::SmallVector<llvm::StringRef> recipeNames; + module.walk([&](mlir::Operation *recipe) { + llvm::TypeSwitch<mlir::Operation *, void>(recipe) + .Case<mlir::acc::PrivateRecipeOp, mlir::acc::FirstprivateRecipeOp, + mlir::acc::ReductionRecipeOp>([&](auto recipe) { + mlir::Type oldType = recipe.getType(); + auto bufferizedType = + BufferizeInterface::mustBufferize(recipe.getType()); + if (!bufferizedType) + return; + recipe.setTypeAttr(mlir::TypeAttr::get(*bufferizedType)); + mlir::Location loc = recipe.getLoc(); + using RecipeOp = decltype(recipe); + bufferizeRegionArgsAndYields(recipe.getInitRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v<RecipeOp, + mlir::acc::FirstprivateRecipeOp>) + bufferizeRegionArgsAndYields(recipe.getCopyRegion(), loc, oldType, + *bufferizedType); + if constexpr (std::is_same_v<RecipeOp, + mlir::acc::ReductionRecipeOp>) + bufferizeRegionArgsAndYields(recipe.getCombinerRegion(), loc, + oldType, *bufferizedType); + bufferizeRegionArgsAndYields(recipe.getDestroyRegion(), loc, + oldType, *bufferizedType); + recipeNames.push_back(recipe.getSymName()); + }); + }); + if (recipeNames.empty()) + return; + + module.walk([&](mlir::Operation *op) { + llvm::TypeSwitch<mlir::Operation *, void>(op) + .Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>( + [&](auto computeOp) { + for (llvm::StringRef recipeName : recipeNames) { + if (computeOp.getPrivatizationRecipes()) + updateRecipeUse(computeOp.getPrivatizationRecipesAttr(), + computeOp.getPrivateOperands(), recipeName, + op); + if (computeOp.getFirstprivatizationRecipes()) + updateRecipeUse( + computeOp.getFirstprivatizationRecipesAttr(), + computeOp.getFirstprivateOperands(), recipeName, op); + if (computeOp.getReductionRecipes()) + updateRecipeUse(computeOp.getReductionRecipesAttr(), + computeOp.getReductionOperands(), + recipeName, op); + } + }); + }); + } +}; + +} // namespace + +std::unique_ptr<mlir::Pass> fir::acc::createACCRecipeBufferizationPass() { + return std::make_unique<ACCRecipeBufferization>(); +} diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt new file mode 100644 index 0000000..2427da0 --- /dev/null +++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt @@ -0,0 +1,12 @@ +add_flang_library(FIROpenACCTransforms + ACCRecipeBufferization.cpp + + DEPENDS + FIROpenACCPassesIncGen + + LINK_LIBS + MLIRIR + MLIRPass + FIRDialect + MLIROpenACCDialect +) diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp index 260e525..2bbd803 100644 --- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp +++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp @@ -40,6 +40,7 @@ #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" #include "mlir/Support/LLVM.h" +#include "llvm/ADT/BitmaskEnum.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringSet.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -128,6 +129,17 @@ class MapInfoFinalizationPass } } + /// Return true if the module has an OpenMP requires clause that includes + /// unified_shared_memory. + static bool moduleRequiresUSM(mlir::ModuleOp module) { + assert(module && "invalid module"); + if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>( + "omp.requires")) + return mlir::omp::bitEnumContainsAll( + req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory); + return false; + } + /// Create the member map for coordRef and append it (and its index /// path) to the provided new* vectors, if it is not already present. void appendMemberMapIfNew( @@ -425,8 +437,12 @@ class MapInfoFinalizationPass mapFlags flags = mapFlags::OMP_MAP_TO | (mapFlags(mapTypeFlag) & - (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE | - mapFlags::OMP_MAP_ALWAYS)); + (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS)); + // For unified_shared_memory, we additionally add `CLOSE` on the descriptor + // to ensure device-local placement where required by tests relying on USM + + // close semantics. + if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>())) + flags |= mapFlags::OMP_MAP_CLOSE; return llvm::to_underlying(flags); } @@ -518,6 +534,75 @@ class MapInfoFinalizationPass return newMapInfoOp; } + // Expand mappings of type(C_PTR) to map their `__address` field explicitly + // as a single pointer-sized member (USM-gated at callsite). This helps in + // USM scenarios to ensure the pointer-sized mapping is used. + mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op, + fir::FirOpBuilder &builder) { + if (!op.getMembers().empty()) + return op; + + mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType()); + if (!mlir::isa<fir::RecordType>(varTy)) + return op; + auto recTy = mlir::cast<fir::RecordType>(varTy); + // If not a builtin C_PTR record, skip. + if (!recTy.getName().ends_with("__builtin_c_ptr")) + return op; + + // Find the index of the c_ptr address component named "__address". + int32_t fieldIdx = recTy.getFieldIndex("__address"); + if (fieldIdx < 0) + return op; + + mlir::Location loc = op.getVarPtr().getLoc(); + mlir::Type memTy = recTy.getType(fieldIdx); + fir::IntOrValue idxConst = + mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx); + mlir::Value coord = fir::CoordinateOp::create( + builder, loc, builder.getRefType(memTy), op.getVarPtr(), + llvm::SmallVector<fir::IntOrValue, 1>{idxConst}); + + // Child for the `__address` member. + llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}}; + mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx); + // Force CLOSE in USM paths so the pointer gets device-local placement + // when required by tests relying on USM + close semantics. + uint64_t mapTypeVal = + op.getMapType() | + llvm::to_underlying( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE); + mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr( + builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal); + + mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create( + builder, loc, coord.getType(), coord, + mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr, + builder.getAttr<mlir::omp::VariableCaptureKindAttr>( + mlir::omp::VariableCaptureKind::ByRef), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{}, + /*member_index=*/mlir::ArrayAttr{}, + /*bounds=*/op.getBounds(), + /*mapperId=*/mlir::FlatSymbolRefAttr(), + /*name=*/op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + + // Rebuild the parent as a container with the `__address` member. + mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create( + builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(), + op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(), + /*varPtrPtr=*/mlir::Value{}, + /*members=*/llvm::SmallVector<mlir::Value>{memberMap}, + /*member_index=*/newMembersAttr, + /*bounds=*/llvm::SmallVector<mlir::Value>{}, + /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(), + /*partial_map=*/builder.getBoolAttr(false)); + op.replaceAllUsesWith(newParent.getResult()); + op->erase(); + return newParent; + } + mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op, fir::FirOpBuilder &builder, mlir::Operation *target) { @@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass genBoxcharMemberMap(op, builder); }); + // Expand type(C_PTR) only when unified_shared_memory is required, + // to ensure device-visible pointer size/behavior in USM scenarios + // without changing default expectations elsewhere. + func->walk([&](mlir::omp::MapInfoOp op) { + // Only expand C_PTR members when unified_shared_memory is required. + if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>())) + return; + builder.setInsertionPoint(op); + genCptrMemberMap(op, builder); + }); + func->walk([&](mlir::omp::MapInfoOp op) { // TODO: Currently only supports a single user for the MapInfoOp. This // is fine for the moment, as the Fortran frontend will generate a diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp index c71642c..92390e4a 100644 --- a/flang/lib/Optimizer/Support/Utils.cpp +++ b/flang/lib/Optimizer/Support/Utils.cpp @@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault( return std::nullopt; } +std::optional<bool> +fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable) { + fir::TypeInfoOp typeInfo = + fir::lookupTypeInfoOp(recordType, module, symbolTable); + if (!typeInfo) + return std::nullopt; + return !typeInfo.getNoFinal(); +} + mlir::LLVM::ConstantOp fir::genConstantIndex(mlir::Location loc, mlir::Type ity, mlir::ConversionPatternRewriter &rewriter, diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 9507021..b5771eb 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -548,6 +548,14 @@ TYPE_PARSER(construct<OmpAllocatorSimpleModifier>(scalarIntExpr)) TYPE_PARSER(construct<OmpAlwaysModifier>( // "ALWAYS" >> pure(OmpAlwaysModifier::Value::Always))) +TYPE_PARSER(construct<OmpAttachModifier::Value>( + "ALWAYS" >> pure(OmpAttachModifier::Value::Always) || + "AUTO" >> pure(OmpAttachModifier::Value::Auto) || + "NEVER" >> pure(OmpAttachModifier::Value::Never))) + +TYPE_PARSER(construct<OmpAttachModifier>( // + "ATTACH" >> parenthesized(Parser<OmpAttachModifier::Value>{}))) + TYPE_PARSER(construct<OmpAutomapModifier>( "AUTOMAP" >> pure(OmpAutomapModifier::Value::Automap))) @@ -744,6 +752,7 @@ TYPE_PARSER(sourced( TYPE_PARSER(sourced(construct<OmpMapClause::Modifier>( sourced(construct<OmpMapClause::Modifier>(Parser<OmpAlwaysModifier>{}) || + construct<OmpMapClause::Modifier>(Parser<OmpAttachModifier>{}) || construct<OmpMapClause::Modifier>(Parser<OmpCloseModifier>{}) || construct<OmpMapClause::Modifier>(Parser<OmpDeleteModifier>{}) || construct<OmpMapClause::Modifier>(Parser<OmpPresentModifier>{}) || diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 0511f5b..b172e429 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2384,6 +2384,11 @@ public: Walk(x.v); Put(")"); } + void Unparse(const OmpAttachModifier &x) { + Word("ATTACH("); + Walk(x.v); + Put(")"); + } void Unparse(const OmpOrderClause &x) { using Modifier = OmpOrderClause::Modifier; Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ":"); @@ -2820,6 +2825,7 @@ public: WALK_NESTED_ENUM(OmpMapType, Value) // OMP map-type WALK_NESTED_ENUM(OmpMapTypeModifier, Value) // OMP map-type-modifier WALK_NESTED_ENUM(OmpAlwaysModifier, Value) + WALK_NESTED_ENUM(OmpAttachModifier, Value) WALK_NESTED_ENUM(OmpCloseModifier, Value) WALK_NESTED_ENUM(OmpDeleteModifier, Value) WALK_NESTED_ENUM(OmpPresentModifier, Value) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index ea5e2c0..31e246c 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -3622,6 +3622,7 @@ void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp, ioKind == common::DefinedIo::ReadUnformatted ? Attr::INTENT_INOUT : Attr::INTENT_IN); + CheckDioDummyIsScalar(subp, *arg); } } @@ -3687,6 +3688,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp, "Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US, arg->name()); } + CheckDioDummyIsScalar(subp, *arg); } } diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index 351af5c..515121a 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -519,8 +519,8 @@ private: /// function references with scalar data pointer result of non-character /// intrinsic type or variables that are non-polymorphic scalar pointers /// and any length type parameter must be constant. -void OmpStructureChecker::CheckAtomicType( - SymbolRef sym, parser::CharBlock source, std::string_view name) { +void OmpStructureChecker::CheckAtomicType(SymbolRef sym, + parser::CharBlock source, std::string_view name, bool checkTypeOnPointer) { const DeclTypeSpec *typeSpec{sym->GetType()}; if (!typeSpec) { return; @@ -547,6 +547,22 @@ void OmpStructureChecker::CheckAtomicType( return; } + // Apply pointer-to-non-intrinsic rule only for intrinsic-assignment paths. + if (checkTypeOnPointer) { + using Category = DeclTypeSpec::Category; + Category cat{typeSpec->category()}; + if (cat != Category::Numeric && cat != Category::Logical) { + std::string details = " has the POINTER attribute"; + if (const auto *derived{typeSpec->AsDerived()}) { + details += " and derived type '"s + derived->name().ToString() + "'"; + } + context_.Say(source, + "ATOMIC operation requires an intrinsic scalar variable; '%s'%s"_err_en_US, + sym->name(), details); + return; + } + } + // Go over all length parameters, if any, and check if they are // explicit. if (const DerivedTypeSpec *derived{typeSpec->AsDerived()}) { @@ -562,7 +578,7 @@ void OmpStructureChecker::CheckAtomicType( } void OmpStructureChecker::CheckAtomicVariable( - const SomeExpr &atom, parser::CharBlock source) { + const SomeExpr &atom, parser::CharBlock source, bool checkTypeOnPointer) { if (atom.Rank() != 0) { context_.Say(source, "Atomic variable %s should be a scalar"_err_en_US, atom.AsFortran()); @@ -572,7 +588,7 @@ void OmpStructureChecker::CheckAtomicVariable( assert(dsgs.size() == 1 && "Should have a single top-level designator"); evaluate::SymbolVector syms{evaluate::GetSymbolVector(dsgs.front())}; - CheckAtomicType(syms.back(), source, atom.AsFortran()); + CheckAtomicType(syms.back(), source, atom.AsFortran(), checkTypeOnPointer); if (IsAllocatable(syms.back()) && !IsArrayElement(atom)) { context_.Say(source, "Atomic variable %s cannot be ALLOCATABLE"_err_en_US, @@ -789,7 +805,8 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, rsrc); + CheckAtomicVariable( + atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(capture)); // This part should have been checked prior to calling this function. assert(*GetConvertInput(capture.rhs) == atom && "This cannot be a capture assignment"); @@ -808,7 +825,8 @@ void OmpStructureChecker::CheckAtomicReadAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, rsrc); + CheckAtomicVariable( + atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(read)); CheckStorageOverlap(atom, {read.lhs}, source); } } else { @@ -829,7 +847,8 @@ void OmpStructureChecker::CheckAtomicWriteAssignment( if (!IsVarOrFunctionRef(atom)) { ErrorShouldBeVariable(atom, rsrc); } else { - CheckAtomicVariable(atom, lsrc); + CheckAtomicVariable( + atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(write)); CheckStorageOverlap(atom, {write.rhs}, source); } } @@ -854,7 +873,8 @@ OmpStructureChecker::CheckAtomicUpdateAssignment( return std::nullopt; } - CheckAtomicVariable(atom, lsrc); + CheckAtomicVariable( + atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(update)); auto [hasErrors, tryReassoc]{CheckAtomicUpdateAssignmentRhs( atom, update.rhs, source, /*suppressDiagnostics=*/true)}; @@ -1017,7 +1037,8 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment( return; } - CheckAtomicVariable(atom, alsrc); + CheckAtomicVariable( + atom, alsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(assign)); auto top{GetTopLevelOperationIgnoreResizing(cond)}; // Missing arguments to operations would have been diagnosed by now. diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index d65a89e..4b5610a 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3017,8 +3017,8 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) { &objs, std::string clause) { for (const auto &obj : objs.v) { - if (const parser::Name * - objName{parser::Unwrap<parser::Name>(obj)}) { + if (const parser::Name *objName{ + parser::Unwrap<parser::Name>(obj)}) { if (&objName->symbol->GetUltimate() == eventHandleSym) { context_.Say(GetContext().clauseSource, "A variable: `%s` that appears in a DETACH clause cannot appear on %s clause on the same construct"_err_en_US, @@ -3637,7 +3637,8 @@ void OmpStructureChecker::CheckReductionModifier( if (modifier.v == ReductionModifier::Value::Task) { // "Task" is only allowed on worksharing or "parallel" directive. static llvm::omp::Directive worksharing[]{ - llvm::omp::Directive::OMPD_do, llvm::omp::Directive::OMPD_scope, + llvm::omp::Directive::OMPD_do, // + llvm::omp::Directive::OMPD_scope, // llvm::omp::Directive::OMPD_sections, // There are more worksharing directives, but they do not apply: // "for" is C++ only, @@ -4081,9 +4082,15 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { if (auto *iter{OmpGetUniqueModifier<parser::OmpIterator>(modifiers)}) { CheckIteratorModifier(*iter); } + + using Directive = llvm::omp::Directive; + Directive dir{GetContext().directive}; + llvm::ArrayRef<Directive> leafs{llvm::omp::getLeafConstructsOrSelf(dir)}; + parser::OmpMapType::Value mapType{parser::OmpMapType::Value::Storage}; + if (auto *type{OmpGetUniqueModifier<parser::OmpMapType>(modifiers)}) { - using Directive = llvm::omp::Directive; using Value = parser::OmpMapType::Value; + mapType = type->v; static auto isValidForVersion{ [](parser::OmpMapType::Value t, unsigned version) { @@ -4120,10 +4127,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { return result; }()}; - llvm::omp::Directive dir{GetContext().directive}; - llvm::ArrayRef<llvm::omp::Directive> leafs{ - llvm::omp::getLeafConstructsOrSelf(dir)}; - if (llvm::is_contained(leafs, Directive::OMPD_target) || llvm::is_contained(leafs, Directive::OMPD_target_data)) { if (version >= 60) { @@ -4141,6 +4144,43 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { } } + if (auto *attach{ + OmpGetUniqueModifier<parser::OmpAttachModifier>(modifiers)}) { + bool mapEnteringConstructOrMapper{ + llvm::is_contained(leafs, Directive::OMPD_target) || + llvm::is_contained(leafs, Directive::OMPD_target_data) || + llvm::is_contained(leafs, Directive::OMPD_target_enter_data) || + llvm::is_contained(leafs, Directive::OMPD_declare_mapper)}; + + if (!mapEnteringConstructOrMapper || !IsMapEnteringType(mapType)) { + const auto &desc{OmpGetDescriptor<parser::OmpAttachModifier>()}; + context_.Say(OmpGetModifierSource(modifiers, attach), + "The '%s' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive"_err_en_US, + desc.name.str()); + } + + auto hasBasePointer{[&](const SomeExpr &item) { + evaluate::SymbolVector symbols{evaluate::GetSymbolVector(item)}; + return llvm::any_of( + symbols, [](SymbolRef s) { return IsPointer(s.get()); }); + }}; + + evaluate::ExpressionAnalyzer ea{context_}; + const auto &objects{std::get<parser::OmpObjectList>(x.v.t)}; + for (auto &object : objects.v) { + if (const parser::Designator *d{GetDesignatorFromObj(object)}) { + if (auto &&expr{ea.Analyze(*d)}) { + if (hasBasePointer(*expr)) { + continue; + } + } + } + auto source{GetObjectSource(object)}; + context_.Say(source ? *source : GetContext().clauseSource, + "A list-item that appears in a map clause with the ATTACH modifier must have a base-pointer"_err_en_US); + } + } + auto &&typeMods{ OmpGetRepeatableModifier<parser::OmpMapTypeModifier>(modifiers)}; struct Less { diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index f507278..543642ff 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -262,10 +262,10 @@ private: void CheckStorageOverlap(const evaluate::Expr<evaluate::SomeType> &, llvm::ArrayRef<evaluate::Expr<evaluate::SomeType>>, parser::CharBlock); void ErrorShouldBeVariable(const MaybeExpr &expr, parser::CharBlock source); - void CheckAtomicType( - SymbolRef sym, parser::CharBlock source, std::string_view name); - void CheckAtomicVariable( - const evaluate::Expr<evaluate::SomeType> &, parser::CharBlock); + void CheckAtomicType(SymbolRef sym, parser::CharBlock source, + std::string_view name, bool checkTypeOnPointer = true); + void CheckAtomicVariable(const evaluate::Expr<evaluate::SomeType> &, + parser::CharBlock, bool checkTypeOnPointer = true); std::pair<const parser::ExecutionPartConstruct *, const parser::ExecutionPartConstruct *> CheckUpdateCapture(const parser::ExecutionPartConstruct *ec1, diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 8074c94..556259d 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -17,6 +17,7 @@ #include "flang/Semantics/semantics.h" #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" +#include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" @@ -24,6 +25,7 @@ #include <fstream> #include <set> #include <string_view> +#include <type_traits> #include <variant> #include <vector> @@ -359,6 +361,40 @@ void ModFileWriter::PrepareRenamings(const Scope &scope) { } } +static void PutOpenMPRequirements(llvm::raw_ostream &os, const Symbol &symbol) { + using RequiresClauses = WithOmpDeclarative::RequiresClauses; + using OmpMemoryOrderType = common::OmpMemoryOrderType; + + const auto [reqs, order]{common::visit( + [&](auto &&details) + -> std::pair<const RequiresClauses *, const OmpMemoryOrderType *> { + if constexpr (std::is_convertible_v<decltype(details), + const WithOmpDeclarative &>) { + return {details.ompRequires(), details.ompAtomicDefaultMemOrder()}; + } else { + return {nullptr, nullptr}; + } + }, + symbol.details())}; + + if (order) { + llvm::omp::Clause admo{llvm::omp::Clause::OMPC_atomic_default_mem_order}; + os << "!$omp requires " + << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(admo)) + << '(' << parser::ToLowerCaseLetters(EnumToString(*order)) << ")\n"; + } + if (reqs) { + os << "!$omp requires"; + reqs->IterateOverMembers([&](llvm::omp::Clause f) { + if (f != llvm::omp::Clause::OMPC_atomic_default_mem_order) { + os << ' ' + << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(f)); + } + }); + os << "\n"; + } +} + // Put out the visible symbols from scope. void ModFileWriter::PutSymbols( const Scope &scope, UnorderedSymbolSet *hermeticModules) { @@ -396,6 +432,7 @@ void ModFileWriter::PutSymbols( for (const Symbol &symbol : uses) { PutUse(symbol); } + PutOpenMPRequirements(decls_, DEREF(scope.symbol())); for (const auto &set : scope.equivalenceSets()) { if (!set.empty() && !set.front().symbol.test(Symbol::Flag::CompilerCreated)) { diff --git a/flang/lib/Semantics/openmp-modifiers.cpp b/flang/lib/Semantics/openmp-modifiers.cpp index af4000c..717fb03 100644 --- a/flang/lib/Semantics/openmp-modifiers.cpp +++ b/flang/lib/Semantics/openmp-modifiers.cpp @@ -157,6 +157,22 @@ const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAlwaysModifier>() { } template <> +const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAttachModifier>() { + static const OmpModifierDescriptor desc{ + /*name=*/"attach-modifier", + /*props=*/ + { + {61, {OmpProperty::Unique}}, + }, + /*clauses=*/ + { + {61, {Clause::OMPC_map}}, + }, + }; + return desc; +} + +template <> const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAutomapModifier>() { static const OmpModifierDescriptor desc{ /*name=*/"automap-modifier", diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index a8ec4d6..292e73b 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -13,6 +13,7 @@ #include "flang/Semantics/openmp-utils.h" #include "flang/Common/Fortran-consts.h" +#include "flang/Common/idioms.h" #include "flang/Common/indirection.h" #include "flang/Common/reference.h" #include "flang/Common/visit.h" @@ -59,6 +60,26 @@ const Scope &GetScopingUnit(const Scope &scope) { return *iter; } +const Scope &GetProgramUnit(const Scope &scope) { + const Scope *unit{nullptr}; + for (const Scope *iter{&scope}; !iter->IsTopLevel(); iter = &iter->parent()) { + switch (iter->kind()) { + case Scope::Kind::BlockData: + case Scope::Kind::MainProgram: + case Scope::Kind::Module: + return *iter; + case Scope::Kind::Subprogram: + // Ignore subprograms that are nested. + unit = iter; + break; + default: + break; + } + } + assert(unit && "Scope not in a program unit"); + return *unit; +} + SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x) { if (x == nullptr) { return SourcedActionStmt{}; @@ -202,7 +223,7 @@ std::optional<SomeExpr> GetEvaluateExpr(const parser::Expr &parserExpr) { // ForwardOwningPointer typedExpr // `- GenericExprWrapper ^.get() // `- std::optional<Expr> ^->v - return typedExpr.get()->v; + return DEREF(typedExpr.get()).v; } std::optional<evaluate::DynamicType> GetDynamicType( diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 18fc638..1228493 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -435,6 +435,22 @@ public: return true; } + bool Pre(const parser::UseStmt &x) { + if (x.moduleName.symbol) { + Scope &thisScope{context_.FindScope(x.moduleName.source)}; + common::visit( + [&](auto &&details) { + if constexpr (std::is_convertible_v<decltype(details), + const WithOmpDeclarative &>) { + AddOmpRequiresToScope(thisScope, details.ompRequires(), + details.ompAtomicDefaultMemOrder()); + } + }, + x.moduleName.symbol->details()); + } + return true; + } + bool Pre(const parser::OmpMetadirectiveDirective &x) { PushContext(x.v.source, llvm::omp::Directive::OMPD_metadirective); return true; @@ -538,38 +554,37 @@ public: void Post(const parser::OpenMPFlushConstruct &) { PopContext(); } bool Pre(const parser::OpenMPRequiresConstruct &x) { - using Flags = WithOmpDeclarative::RequiresFlags; - using Requires = WithOmpDeclarative::RequiresFlag; + using RequiresClauses = WithOmpDeclarative::RequiresClauses; PushContext(x.source, llvm::omp::Directive::OMPD_requires); // Gather information from the clauses. - Flags flags; - std::optional<common::OmpMemoryOrderType> memOrder; + RequiresClauses reqs; + const common::OmpMemoryOrderType *memOrder{nullptr}; for (const parser::OmpClause &clause : x.v.Clauses().v) { - flags |= common::visit( + using OmpClause = parser::OmpClause; + reqs |= common::visit( common::visitors{ - [&memOrder]( - const parser::OmpClause::AtomicDefaultMemOrder &atomic) { - memOrder = atomic.v.v; - return Flags{}; - }, - [](const parser::OmpClause::ReverseOffload &) { - return Flags{Requires::ReverseOffload}; - }, - [](const parser::OmpClause::UnifiedAddress &) { - return Flags{Requires::UnifiedAddress}; + [&](const OmpClause::AtomicDefaultMemOrder &atomic) { + memOrder = &atomic.v.v; + return RequiresClauses{}; }, - [](const parser::OmpClause::UnifiedSharedMemory &) { - return Flags{Requires::UnifiedSharedMemory}; - }, - [](const parser::OmpClause::DynamicAllocators &) { - return Flags{Requires::DynamicAllocators}; + [&](auto &&s) { + using TypeS = llvm::remove_cvref_t<decltype(s)>; + if constexpr ( // + std::is_same_v<TypeS, OmpClause::DynamicAllocators> || + std::is_same_v<TypeS, OmpClause::ReverseOffload> || + std::is_same_v<TypeS, OmpClause::UnifiedAddress> || + std::is_same_v<TypeS, OmpClause::UnifiedSharedMemory>) { + return RequiresClauses{clause.Id()}; + } else { + return RequiresClauses{}; + } }, - [](const auto &) { return Flags{}; }}, + }, clause.u); } // Merge clauses into parents' symbols details. - AddOmpRequiresToScope(currScope(), flags, memOrder); + AddOmpRequiresToScope(currScope(), &reqs, memOrder); return true; } void Post(const parser::OpenMPRequiresConstruct &) { PopContext(); } @@ -1001,8 +1016,9 @@ private: std::int64_t ordCollapseLevel{0}; - void AddOmpRequiresToScope(Scope &, WithOmpDeclarative::RequiresFlags, - std::optional<common::OmpMemoryOrderType>); + void AddOmpRequiresToScope(Scope &, + const WithOmpDeclarative::RequiresClauses *, + const common::OmpMemoryOrderType *); void IssueNonConformanceWarning(llvm::omp::Directive D, parser::CharBlock source, unsigned EmitFromVersion); @@ -3309,86 +3325,6 @@ void ResolveOmpParts( } } -void ResolveOmpTopLevelParts( - SemanticsContext &context, const parser::Program &program) { - if (!context.IsEnabled(common::LanguageFeature::OpenMP)) { - return; - } - - // Gather REQUIRES clauses from all non-module top-level program unit symbols, - // combine them together ensuring compatibility and apply them to all these - // program units. Modules are skipped because their REQUIRES clauses should be - // propagated via USE statements instead. - WithOmpDeclarative::RequiresFlags combinedFlags; - std::optional<common::OmpMemoryOrderType> combinedMemOrder; - - // Function to go through non-module top level program units and extract - // REQUIRES information to be processed by a function-like argument. - auto processProgramUnits{[&](auto processFn) { - for (const parser::ProgramUnit &unit : program.v) { - if (!std::holds_alternative<common::Indirection<parser::Module>>( - unit.u) && - !std::holds_alternative<common::Indirection<parser::Submodule>>( - unit.u) && - !std::holds_alternative< - common::Indirection<parser::CompilerDirective>>(unit.u)) { - Symbol *symbol{common::visit( - [&context](auto &x) { - Scope *scope = GetScope(context, x.value()); - return scope ? scope->symbol() : nullptr; - }, - unit.u)}; - // FIXME There is no symbol defined for MainProgram units in certain - // circumstances, so REQUIRES information has no place to be stored in - // these cases. - if (!symbol) { - continue; - } - common::visit( - [&](auto &details) { - if constexpr (std::is_convertible_v<decltype(&details), - WithOmpDeclarative *>) { - processFn(*symbol, details); - } - }, - symbol->details()); - } - } - }}; - - // Combine global REQUIRES information from all program units except modules - // and submodules. - processProgramUnits([&](Symbol &symbol, WithOmpDeclarative &details) { - if (const WithOmpDeclarative::RequiresFlags * - flags{details.ompRequires()}) { - combinedFlags |= *flags; - } - if (const common::OmpMemoryOrderType * - memOrder{details.ompAtomicDefaultMemOrder()}) { - if (combinedMemOrder && *combinedMemOrder != *memOrder) { - context.Say(symbol.scope()->sourceRange(), - "Conflicting '%s' REQUIRES clauses found in compilation " - "unit"_err_en_US, - parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName( - llvm::omp::Clause::OMPC_atomic_default_mem_order) - .str())); - } - combinedMemOrder = *memOrder; - } - }); - - // Update all program units except modules and submodules with the combined - // global REQUIRES information. - processProgramUnits([&](Symbol &, WithOmpDeclarative &details) { - if (combinedFlags.any()) { - details.set_ompRequires(combinedFlags); - } - if (combinedMemOrder) { - details.set_ompAtomicDefaultMemOrder(*combinedMemOrder); - } - }); -} - static bool IsSymbolThreadprivate(const Symbol &symbol) { if (const auto *details{symbol.detailsIf<HostAssocDetails>()}) { return details->symbol().test(Symbol::Flag::OmpThreadprivate); @@ -3547,42 +3483,39 @@ void OmpAttributeVisitor::CheckLabelContext(const parser::CharBlock source, } void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope, - WithOmpDeclarative::RequiresFlags flags, - std::optional<common::OmpMemoryOrderType> memOrder) { - Scope *scopeIter = &scope; - do { - if (Symbol * symbol{scopeIter->symbol()}) { - common::visit( - [&](auto &details) { - // Store clauses information into the symbol for the parent and - // enclosing modules, programs, functions and subroutines. - if constexpr (std::is_convertible_v<decltype(&details), - WithOmpDeclarative *>) { - if (flags.any()) { - if (const WithOmpDeclarative::RequiresFlags * - otherFlags{details.ompRequires()}) { - flags |= *otherFlags; - } - details.set_ompRequires(flags); + const WithOmpDeclarative::RequiresClauses *reqs, + const common::OmpMemoryOrderType *memOrder) { + const Scope &programUnit{omp::GetProgramUnit(scope)}; + using RequiresClauses = WithOmpDeclarative::RequiresClauses; + RequiresClauses combinedReqs{reqs ? *reqs : RequiresClauses{}}; + + if (auto *symbol{const_cast<Symbol *>(programUnit.symbol())}) { + common::visit( + [&](auto &details) { + if constexpr (std::is_convertible_v<decltype(&details), + WithOmpDeclarative *>) { + if (combinedReqs.any()) { + if (const RequiresClauses *otherReqs{details.ompRequires()}) { + combinedReqs |= *otherReqs; } - if (memOrder) { - if (details.has_ompAtomicDefaultMemOrder() && - *details.ompAtomicDefaultMemOrder() != *memOrder) { - context_.Say(scopeIter->sourceRange(), - "Conflicting '%s' REQUIRES clauses found in compilation " - "unit"_err_en_US, - parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName( - llvm::omp::Clause::OMPC_atomic_default_mem_order) - .str())); - } - details.set_ompAtomicDefaultMemOrder(*memOrder); + details.set_ompRequires(combinedReqs); + } + if (memOrder) { + if (details.has_ompAtomicDefaultMemOrder() && + *details.ompAtomicDefaultMemOrder() != *memOrder) { + context_.Say(programUnit.sourceRange(), + "Conflicting '%s' REQUIRES clauses found in compilation " + "unit"_err_en_US, + parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName( + llvm::omp::Clause::OMPC_atomic_default_mem_order) + .str())); } + details.set_ompAtomicDefaultMemOrder(*memOrder); } - }, - symbol->details()); - } - scopeIter = &scopeIter->parent(); - } while (!scopeIter->IsGlobal()); + } + }, + symbol->details()); + } } void OmpAttributeVisitor::IssueNonConformanceWarning(llvm::omp::Directive D, diff --git a/flang/lib/Semantics/resolve-directives.h b/flang/lib/Semantics/resolve-directives.h index 5a890c2..36d3ce9 100644 --- a/flang/lib/Semantics/resolve-directives.h +++ b/flang/lib/Semantics/resolve-directives.h @@ -23,7 +23,5 @@ class SemanticsContext; void ResolveAccParts( SemanticsContext &, const parser::ProgramUnit &, Scope *topScope); void ResolveOmpParts(SemanticsContext &, const parser::ProgramUnit &); -void ResolveOmpTopLevelParts(SemanticsContext &, const parser::Program &); - } // namespace Fortran::semantics #endif diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 86121880..ae0ff9ca 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -10687,9 +10687,6 @@ void ResolveNamesVisitor::Post(const parser::Program &x) { CHECK(!attrs_); CHECK(!cudaDataAttr_); CHECK(!GetDeclTypeSpec()); - // Top-level resolution to propagate information across program units after - // each of them has been resolved separately. - ResolveOmpTopLevelParts(context(), x); } // A singleton instance of the scope -> IMPLICIT rules mapping is diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp index 69169469..0ec44b7 100644 --- a/flang/lib/Semantics/symbol.cpp +++ b/flang/lib/Semantics/symbol.cpp @@ -70,6 +70,32 @@ static void DumpList(llvm::raw_ostream &os, const char *label, const T &list) { } } +llvm::raw_ostream &operator<<( + llvm::raw_ostream &os, const WithOmpDeclarative &x) { + if (x.has_ompRequires() || x.has_ompAtomicDefaultMemOrder()) { + os << " OmpRequirements:("; + if (const common::OmpMemoryOrderType *admo{x.ompAtomicDefaultMemOrder()}) { + os << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName( + llvm::omp::Clause::OMPC_atomic_default_mem_order)) + << '(' << parser::ToLowerCaseLetters(EnumToString(*admo)) << ')'; + if (x.has_ompRequires()) { + os << ','; + } + } + if (const WithOmpDeclarative::RequiresClauses *reqs{x.ompRequires()}) { + size_t num{0}, size{reqs->count()}; + reqs->IterateOverMembers([&](llvm::omp::Clause f) { + os << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(f)); + if (++num < size) { + os << ','; + } + }); + } + os << ')'; + } + return os; +} + void SubprogramDetails::set_moduleInterface(Symbol &symbol) { CHECK(!moduleInterface_); moduleInterface_ = &symbol; @@ -150,6 +176,7 @@ llvm::raw_ostream &operator<<( os << x; } } + os << static_cast<const WithOmpDeclarative &>(x); return os; } @@ -580,7 +607,9 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) { common::visit( // common::visitors{ [&](const UnknownDetails &) {}, - [&](const MainProgramDetails &) {}, + [&](const MainProgramDetails &x) { + os << static_cast<const WithOmpDeclarative &>(x); + }, [&](const ModuleDetails &x) { if (x.isSubmodule()) { os << " ("; @@ -599,6 +628,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) { if (x.isDefaultPrivate()) { os << " isDefaultPrivate"; } + os << static_cast<const WithOmpDeclarative &>(x); }, [&](const SubprogramNameDetails &x) { os << ' ' << EnumToString(x.kind()); diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir index bbd3f9f..60cda9e 100644 --- a/flang/test/Fir/CUDA/cuda-code-gen.mlir +++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir @@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e // CHECK-LABEL: llvm.func @_QQxxx() // CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr // CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor + +// ----- + +module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} { + gpu.module @cuda_device_mod { + fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 { + %0 = fir.zero_bits i32 + fir.has_value %0 : i32 + } + gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel { + %c-1 = arith.constant -1 : index + %c1_i32 = arith.constant 1 : i32 + %0 = arith.constant 1 : i32 + %1 = arith.addi %0, %c1_i32 : i32 + %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32> + %4 = fir.load %2 : !fir.ref<i32> + %5 = fir.convert %1 : (i32) -> i64 + %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32> + fir.store %4 to %6 : !fir.ref<i32> + gpu.return + } + } +} + +// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32 +// CHECK-LABEL: gpu.func @_QMkernelsPassign +// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4> +// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir new file mode 100644 index 0000000..fabfe4c --- /dev/null +++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir @@ -0,0 +1,24 @@ +// Use --mlir-disable-threading so that the diagnostic printing is serialized. +// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s + +module { + // Build a scalar view via fir.declare with a storage operand into an array of i8 + func.func @_QPdeclare_with_storage_is_nonscalar() { + %c0 = arith.constant 0 : index + %arr = fir.alloca !fir.array<4xi8> + %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8> + %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32> + %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"} + : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32> + // Force interface query through an acc op that prints type category + %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + acc.enter_data dataOperands(%cp : !fir.ref<f32>) + return + } + + // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false} + // CHECK: Pointer-like and Mappable: !fir.ref<f32> + // CHECK: Type category: array +} + + diff --git a/flang/test/Fir/OpenACC/recipe-bufferization.mlir b/flang/test/Fir/OpenACC/recipe-bufferization.mlir new file mode 100644 index 0000000..c4f96f6 --- /dev/null +++ b/flang/test/Fir/OpenACC/recipe-bufferization.mlir @@ -0,0 +1,316 @@ +// RUN: fir-opt %s --fir-acc-recipe-bufferization -split-input-file | FileCheck %s + +// ----- + +acc.private.recipe @priv_ref_box : !fir.box<i32> init { +^bb0(%arg0: !fir.box<i32>): + %1 = fir.allocmem i32 + %2 = fir.embox %1 : (!fir.heap<i32>) -> !fir.box<i32> + acc.yield %2 : !fir.box<i32> +} destroy { +^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>): + %0 = fir.box_addr %arg1 : (!fir.box<i32>) -> !fir.ref<i32> + %1 = fir.convert %0 : (!fir.ref<i32>) -> !fir.heap<i32> + fir.freemem %1 : !fir.heap<i32> + acc.yield +} + +// CHECK-LABEL: acc.private.recipe @priv_ref_box : !fir.ref<!fir.box<i32>> init +// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOX:.*]] = fir.embox +// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: } destroy { +// CHECK: ^bb0(%[[DARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[DARG1:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[LD1:.*]] = fir.load %[[DARG1]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[ADDR:.*]] = fir.box_addr %[[LD1]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[CVT:.*]] = fir.convert %[[ADDR]] : (!fir.ref<i32>) -> !fir.heap<i32> + +// ----- + +// Test private recipe without destroy region. + +acc.private.recipe @priv_ref_box_no_destroy : !fir.box<i32> init { +^bb0(%arg0: !fir.box<i32>): + %1 = fir.alloca i32 + %2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32> + acc.yield %2 : !fir.box<i32> +} + +// CHECK-LABEL: acc.private.recipe @priv_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init +// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOX:.*]] = fir.embox +// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: } + +// ----- + +// Firstprivate recipe with destroy region. +acc.firstprivate.recipe @fp_ref_box : !fir.box<i32> init { +^bb0(%arg0: !fir.box<i32>): + %0 = fir.allocmem i32 + %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32> + acc.yield %1 : !fir.box<i32> +} copy { +^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>): + %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32> + %val = fir.load %s_addr : !fir.ref<i32> + %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32> + fir.store %val to %d_addr : !fir.ref<i32> + acc.yield +} destroy { +^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>): + acc.yield +} + +// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box : !fir.ref<!fir.box<i32>> init +// CHECK: ^bb0(%[[IARG:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOX_FP:.*]] = fir.embox +// CHECK: %[[ALLOCA_FP:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOX_FP]] to %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.box<i32>>, %[[DST:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[LSRC:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LDST:.*]] = fir.load %[[DST]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[SADDR:.*]] = fir.box_addr %[[LSRC]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[VAL:.*]] = fir.load %[[SADDR]] : !fir.ref<i32> +// CHECK: %[[DADDR:.*]] = fir.box_addr %[[LDST]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: fir.store %[[VAL]] to %[[DADDR]] : !fir.ref<i32> +// CHECK: } destroy { +// CHECK: ^bb0(%[[FDARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[FDARG1:.*]]: !fir.ref<!fir.box<i32>>) + +// ----- + +// Firstprivate recipe without destroy region. +acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.box<i32> init { +^bb0(%arg0: !fir.box<i32>): + %0 = fir.alloca i32 + %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32> + acc.yield %1 : !fir.box<i32> +} copy { +^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>): + %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32> + %val = fir.load %s_addr : !fir.ref<i32> + %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32> + fir.store %val to %d_addr : !fir.ref<i32> + acc.yield +} + +// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init +// CHECK: ^bb0(%[[IARG2:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOX_FP2:.*]] = fir.embox +// CHECK: %[[ALLOCA_FP2:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOX_FP2]] to %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>> +// CHECK: } copy { +// CHECK: ^bb0(%[[SRC2:.*]]: !fir.ref<!fir.box<i32>>, %[[DST2:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[LSRC2:.*]] = fir.load %[[SRC2]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LDST2:.*]] = fir.load %[[DST2]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[SADDR2:.*]] = fir.box_addr %[[LSRC2]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[VAL2:.*]] = fir.load %[[SADDR2]] : !fir.ref<i32> +// CHECK: %[[DADDR2:.*]] = fir.box_addr %[[LDST2]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: fir.store %[[VAL2]] to %[[DADDR2]] : !fir.ref<i32> + +// ----- + +// Reduction recipe with destroy region. +acc.reduction.recipe @red_ref_box : !fir.box<i32> reduction_operator <add> init { +^bb0(%arg0: !fir.box<i32>): + %0 = fir.allocmem i32 + %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32> + acc.yield %1 : !fir.box<i32> +} combiner { +^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>): + %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32> + %l_val = fir.load %l_addr : !fir.ref<i32> + %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32> + %r_val = fir.load %r_addr : !fir.ref<i32> + %sum = arith.addi %l_val, %r_val : i32 + %tmp = fir.alloca i32 + fir.store %sum to %tmp : !fir.ref<i32> + %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32> + acc.yield %new : !fir.box<i32> +} destroy { +^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>): + acc.yield +} + +// CHECK-LABEL: acc.reduction.recipe @red_ref_box : !fir.ref<!fir.box<i32>> reduction_operator <add> init +// CHECK: ^bb0(%[[IARGR:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOXR:.*]] = fir.embox +// CHECK: %[[ALLOCAR:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOXR]] to %[[ALLOCAR]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCAR]] : !fir.ref<!fir.box<i32>> +// CHECK: } combiner { +// CHECK: ^bb0(%[[LHS:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[LLHS:.*]] = fir.load %[[LHS]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LRHS:.*]] = fir.load %[[RHS]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LADDR:.*]] = fir.box_addr %[[LLHS]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[LVAL:.*]] = fir.load %[[LADDR]] : !fir.ref<i32> +// CHECK: %[[RADDR:.*]] = fir.box_addr %[[LRHS]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[RVAL:.*]] = fir.load %[[RADDR]] : !fir.ref<i32> +// CHECK: %[[SUM:.*]] = arith.addi %[[LVAL]], %[[RVAL]] : i32 +// CHECK: %[[I32ALLOCA:.*]] = fir.alloca i32 +// CHECK: fir.store %[[SUM]] to %[[I32ALLOCA]] : !fir.ref<i32> +// CHECK: %[[NEWBOX:.*]] = fir.embox %[[I32ALLOCA]] : (!fir.ref<i32>) -> !fir.box<i32> +// CHECK: %[[BOXALLOCA:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[NEWBOX]] to %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>> +// CHECK: } destroy { +// CHECK: ^bb0(%[[RD0:.*]]: !fir.ref<!fir.box<i32>>, %[[RD1:.*]]: !fir.ref<!fir.box<i32>>) + +// ----- + +// Reduction recipe without destroy region. +acc.reduction.recipe @red_ref_box_no_destroy : !fir.box<i32> reduction_operator <add> init { +^bb0(%arg0: !fir.box<i32>): + %0 = fir.alloca i32 + %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32> + acc.yield %1 : !fir.box<i32> +} combiner { +^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>): + %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32> + %l_val = fir.load %l_addr : !fir.ref<i32> + %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32> + %r_val = fir.load %r_addr : !fir.ref<i32> + %sum = arith.addi %l_val, %r_val : i32 + %tmp = fir.alloca i32 + fir.store %sum to %tmp : !fir.ref<i32> + %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32> + acc.yield %new : !fir.box<i32> +} + +// CHECK-LABEL: acc.reduction.recipe @red_ref_box_no_destroy : !fir.ref<!fir.box<i32>> reduction_operator <add> init +// CHECK: ^bb0(%[[IARGR2:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[EMBOXR2:.*]] = fir.embox +// CHECK: %[[ALLOCAR2:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[EMBOXR2]] to %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>> +// CHECK: } combiner { +// CHECK: ^bb0(%[[LHS2:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS2:.*]]: !fir.ref<!fir.box<i32>>) +// CHECK: %[[LLHS2:.*]] = fir.load %[[LHS2]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LRHS2:.*]] = fir.load %[[RHS2]] : !fir.ref<!fir.box<i32>> +// CHECK: %[[LADDR2:.*]] = fir.box_addr %[[LLHS2]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[LVAL2:.*]] = fir.load %[[LADDR2]] : !fir.ref<i32> +// CHECK: %[[RADDR2:.*]] = fir.box_addr %[[LRHS2]] : (!fir.box<i32>) -> !fir.ref<i32> +// CHECK: %[[RVAL2:.*]] = fir.load %[[RADDR2]] : !fir.ref<i32> +// CHECK: %[[SUM2:.*]] = arith.addi %[[LVAL2]], %[[RVAL2]] : i32 +// CHECK: %[[I32ALLOCA2:.*]] = fir.alloca i32 +// CHECK: fir.store %[[SUM2]] to %[[I32ALLOCA2]] : !fir.ref<i32> +// CHECK: %[[NEWBOX2:.*]] = fir.embox %[[I32ALLOCA2]] : (!fir.ref<i32>) -> !fir.box<i32> +// CHECK: %[[BOXALLOCA2:.*]] = fir.alloca !fir.box<i32> +// CHECK: fir.store %[[NEWBOX2]] to %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>> +// CHECK: acc.yield %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>> + +// ----- + +// Comprehensive tests that also test recipe usages updates. + +acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init { +^bb0(%arg0: !fir.ref<i32>): + %0 = fir.alloca i32 + %1 = fir.declare %0 {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32> + acc.yield %1 : !fir.ref<i32> +} +acc.private.recipe @privatization_box_Uxf32 : !fir.box<!fir.array<?xf32>> init { +^bb0(%arg0: !fir.box<!fir.array<?xf32>>): + %c0 = arith.constant 0 : index + %0:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index) + %1 = fir.shape %0#1 : (index) -> !fir.shape<1> + %2 = fir.allocmem !fir.array<?xf32>, %0#1 {bindc_name = ".tmp", uniq_name = ""} + %3 = fir.declare %2(%1) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>> + %4 = fir.embox %3(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>> + acc.yield %4 : !fir.box<!fir.array<?xf32>> +} destroy { +^bb0(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>): + %0 = fir.box_addr %arg1 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> + %1 = fir.convert %0 : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>> + fir.freemem %1 : !fir.heap<!fir.array<?xf32>> + acc.terminator +} +func.func @_QPfoo(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) { + %c200_i32 = arith.constant 200 : i32 + %c1_i32 = arith.constant 1 : i32 + %0 = fir.dummy_scope : !fir.dscope + %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"} + %2 = fir.declare %1 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32> + %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>> + acc.parallel combined(loop) { + %4 = acc.private var(%3 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "x"} + %5 = acc.private varPtr(%2 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} + acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %4 : !fir.box<!fir.array<?xf32>>, @privatization_ref_i32 -> %5 : !fir.ref<i32>) control(%arg1 : i32) = (%c1_i32 : i32) to (%c200_i32 : i32) step (%c1_i32 : i32) { + %6 = fir.dummy_scope : !fir.dscope + %7 = fir.declare %4 dummy_scope %6 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>> + %8 = fir.declare %5 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32> + %9 = fir.convert %arg1 : (i32) -> f32 + %10 = fir.convert %arg1 : (i32) -> i64 + %11 = fir.array_coor %7 %10 : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32> + fir.store %9 to %11 : !fir.ref<f32> + acc.yield + } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} + acc.yield + } + return +} + +// CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>): +// CHECK: %[[VAL_1:.*]] = fir.alloca i32 +// CHECK: %[[VAL_2:.*]] = fir.declare %[[VAL_1]] {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32> +// CHECK: acc.yield %[[VAL_2]] : !fir.ref<i32> +// CHECK: } + +// CHECK-LABEL: acc.private.recipe @privatization_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>): +// CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>> +// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index +// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index) +// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1> +// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_3]]#1 {bindc_name = ".tmp", uniq_name = ""} +// CHECK: %[[VAL_6:.*]] = fir.declare %[[VAL_5]](%[[VAL_4]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>> +// CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]](%[[VAL_4]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>> +// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf32>> +// CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>> +// CHECK: acc.yield %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>> + +// CHECK-LABEL: } destroy { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>): +// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xf32>>> +// CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>> +// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>> +// CHECK: fir.freemem %[[VAL_4]] : !fir.heap<!fir.array<?xf32>> +// CHECK: acc.terminator +// CHECK: } + +// CHECK-LABEL: func.func @_QPfoo( +// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) { +// CHECK: %[[VAL_0:.*]] = arith.constant 200 : i32 +// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32 +// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"} +// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32> +// CHECK: %[[VAL_5:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>> +// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf32>> +// CHECK: fir.store %[[VAL_5]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>> +// CHECK: acc.parallel combined(loop) { +// CHECK: %[[VAL_7:.*]] = acc.private varPtr(%[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<!fir.array<?xf32>>> {name = "x"} +// CHECK: %[[VAL_8:.*]] = acc.private varPtr(%[[VAL_4]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"} +// CHECK: acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, @privatization_ref_i32 -> %[[VAL_8]] : !fir.ref<i32>) control(%[[VAL_9:.*]] : i32) = (%[[VAL_1]] : i32) to (%[[VAL_0]] : i32) step (%[[VAL_1]] : i32) { +// CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope +// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>> +// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_11]] dummy_scope %[[VAL_10]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>> +// CHECK: %[[VAL_13:.*]] = fir.declare %[[VAL_8]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32> +// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32 +// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64 +// CHECK: %[[VAL_16:.*]] = fir.array_coor %[[VAL_12]] %[[VAL_15]] : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32> +// CHECK: fir.store %[[VAL_14]] to %[[VAL_16]] : !fir.ref<f32> +// CHECK: acc.yield +// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]} +// CHECK: acc.yield +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf new file mode 100644 index 0000000..f68a9aa --- /dev/null +++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf @@ -0,0 +1,15 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s + +program test +implicit none + +type :: t1 + real(4) :: x_fin(1:10) = acos(-1.0_4) +end type t1 + +type(t1), allocatable, device :: t(:) + +! CHECK: not yet implemented: CUDA Fortran: allocate on device with default initialization +allocate(t(1:2)) + +end program diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf new file mode 100644 index 0000000..3e59e2f --- /dev/null +++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf @@ -0,0 +1,9 @@ +! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s + +program main + implicit none + integer, device, allocatable :: a_d(:) + integer, allocatable :: a(:) +! CHECK: not yet implemented: CUDA Fortran: allocate with device source + allocate(a, source=a_d) +end program diff --git a/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf new file mode 100644 index 0000000..af850d5 --- /dev/null +++ b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf @@ -0,0 +1,21 @@ +! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s + +! Test detection of CUDA Fortran data transfer in presence of associuate +! statement. + +module m + real(8), device, dimension(10,10,10) :: d +end module m + +subroutine foo + use m + !@CUF associate(d1 => d) + d1 = 0.0 + !@CUF end associate +end subroutine + +! CHECK-LABEL: func.func @_QPfoo() +! CHECK: %[[D:.*]] = fir.address_of(@_QMmEd) : !fir.ref<!fir.array<10x10x10xf64>> +! CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmEd"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>) +! CHECK: %[[D1_DECL:.*]]:2 = hlfir.declare %[[D_DECL]]#0(%4) {uniq_name = "_QFfooEd1"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>) +! CHECK: cuf.data_transfer %{{.*}} to %[[D1_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : f64, !fir.ref<!fir.array<10x10x10xf64>> diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 index 429f207..3987f9f 100644 --- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 +++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 @@ -4,6 +4,11 @@ ! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s ! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK +! TODO: This test hits a fatal TODO. Deal with allocatable component +! destructions. For arrays, allocatable component allocation may also be +! missing. +! XFAIL: * + module m_firstprivate_derived_alloc_comp type point real, allocatable :: x(:) diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90 index d37eb8d..485825d 100644 --- a/flang/test/Lower/OpenACC/acc-private.f90 +++ b/flang/test/Lower/OpenACC/acc-private.f90 @@ -26,6 +26,12 @@ ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -47,6 +53,12 @@ ! CHECK: %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -64,6 +76,12 @@ ! CHECK: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>> ! CHECK: hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>> ! CHECK: acc.terminator +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init { @@ -74,6 +92,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>) ! CHECK: acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init { @@ -89,6 +113,13 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init { @@ -99,6 +130,12 @@ ! CHECK: %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>> ! CHECK: fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> ! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<i32> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init { @@ -114,6 +151,12 @@ ! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>> ! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: } destroy { +! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>): +! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init { @@ -124,6 +167,12 @@ ! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>) ! CHECK: acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>> +! CHECK: } destroy { +! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>): +! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>> +! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>> +! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init { @@ -140,6 +189,7 @@ ! CHECK: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>> ! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>> +! CHECK: acc.terminator ! CHECK: } ! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init { diff --git a/flang/test/Lower/OpenMP/Todo/attach-modifier.f90 b/flang/test/Lower/OpenMP/Todo/attach-modifier.f90 new file mode 100644 index 0000000..099f4a4 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/attach-modifier.f90 @@ -0,0 +1,9 @@ +!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=61 -o - %s 2>&1 | FileCheck %s +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=61 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: ATTACH modifier is not implemented yet +subroutine f00(x) + integer, pointer :: x + !$omp target map(attach(always), tofrom: x) + !$omp end target +end diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 new file mode 100644 index 0000000..7fc30b4 --- /dev/null +++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 @@ -0,0 +1,21 @@ +! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s +! +! Checks: +! - C_PTR mappings expand to `__address` member with CLOSE under USM paths. +! - use_device_ptr does not implicitly expand member operands in the clause. + +subroutine only_cptr_use_device_ptr + use iso_c_binding + type(c_ptr) :: cptr + integer :: i + + !$omp target data use_device_ptr(cptr) map(tofrom: i) + !$omp end target data +end subroutine + +! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr() +! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"} +! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>> +! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) { +! CHECK: omp.terminator +! CHECK: } diff --git a/flang/test/Parser/OpenMP/map-modifiers-v61.f90 b/flang/test/Parser/OpenMP/map-modifiers-v61.f90 new file mode 100644 index 0000000..79bf73a --- /dev/null +++ b/flang/test/Parser/OpenMP/map-modifiers-v61.f90 @@ -0,0 +1,64 @@ +!RUN: %flang_fc1 -fdebug-unparse-no-sema -fopenmp -fopenmp-version=61 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s +!RUN: %flang_fc1 -fdebug-dump-parse-tree-no-sema -fopenmp -fopenmp-version=61 %s | FileCheck --check-prefix="PARSE-TREE" %s + +subroutine f00(x) + integer, pointer :: x + !$omp target map(attach(always): x) + !$omp end target +end + +!UNPARSE: SUBROUTINE f00 (x) +!UNPARSE: INTEGER, POINTER :: x +!UNPARSE: !$OMP TARGET MAP(ATTACH(ALWAYS): x) +!UNPARSE: !$OMP END TARGET +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target +!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause +!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Always +!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | bool = 'true' +!PARSE-TREE: | Flags = None + + +subroutine f01(x) + integer, pointer :: x + !$omp target map(attach(auto): x) + !$omp end target +end + +!UNPARSE: SUBROUTINE f01 (x) +!UNPARSE: INTEGER, POINTER :: x +!UNPARSE: !$OMP TARGET MAP(ATTACH(AUTO): x) +!UNPARSE: !$OMP END TARGET +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target +!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause +!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Auto +!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | bool = 'true' +!PARSE-TREE: | Flags = None + + +subroutine f02(x) + integer, pointer :: x + !$omp target map(attach(never): x) + !$omp end target +end + +!UNPARSE: SUBROUTINE f02 (x) +!UNPARSE: INTEGER, POINTER :: x +!UNPARSE: !$OMP TARGET MAP(ATTACH(NEVER): x) +!UNPARSE: !$OMP END TARGET +!UNPARSE: END SUBROUTINE + +!PARSE-TREE: OmpBeginDirective +!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target +!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause +!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Never +!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | bool = 'true' +!PARSE-TREE: | Flags = None diff --git a/flang/test/Semantics/OpenMP/dump-requires-details.f90 b/flang/test/Semantics/OpenMP/dump-requires-details.f90 new file mode 100644 index 0000000..9c844c0 --- /dev/null +++ b/flang/test/Semantics/OpenMP/dump-requires-details.f90 @@ -0,0 +1,14 @@ +!RUN: %flang_fc1 -fopenmp -fopenmp-version=60 -fdebug-dump-symbols %s | FileCheck %s + +module fred +!$omp requires atomic_default_mem_order(relaxed) +contains +subroutine f00 + !$omp requires unified_address +end +subroutine f01 + !$omp requires unified_shared_memory +end +end module + +!CHECK: fred: Module OmpRequirements:(atomic_default_mem_order(relaxed),unified_address,unified_shared_memory) diff --git a/flang/test/Semantics/OpenMP/map-modifiers-v61.f90 b/flang/test/Semantics/OpenMP/map-modifiers-v61.f90 new file mode 100644 index 0000000..2daa57892 --- /dev/null +++ b/flang/test/Semantics/OpenMP/map-modifiers-v61.f90 @@ -0,0 +1,49 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=61 -Werror + +subroutine f00(x) + integer, pointer :: x + !ERROR: 'attach-modifier' modifier cannot occur multiple times + !$omp target map(attach(always), attach(never): x) + !$omp end target +end + +subroutine f01(x) + integer, pointer :: x + !ERROR: The 'attach-modifier' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive + !$omp target_exit_data map(attach(always): x) +end + +subroutine f02(x) + integer, pointer :: x + !ERROR: The 'attach-modifier' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive + !$omp target map(attach(never), from: x) + !$omp end target +end + +subroutine f03(x) + integer :: x + !ERROR: A list-item that appears in a map clause with the ATTACH modifier must have a base-pointer + !$omp target map(attach(always), tofrom: x) + !$omp end target +end + +module m +type t + integer :: z +end type + +type u + type(t), pointer :: y +end type + +contains + +subroutine f04(n) + integer :: n + type(u) :: x(10) + + !Expect no diagonstics + !$omp target map(attach(always), to: x(n)%y%z) + !$omp end target +end +end module diff --git a/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 new file mode 100644 index 0000000..6268b0b --- /dev/null +++ b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 @@ -0,0 +1,8 @@ +! RUN: not %flang_fc1 -fopenmp -fsyntax-only %s 2>&1 | FileCheck %s +type t +end type +type(t), pointer :: a1, a2 +!$omp atomic write +a1 = a2 +! CHECK: error: ATOMIC operation requires an intrinsic scalar variable; 'a1' has the POINTER attribute and derived type 't' +end diff --git a/flang/test/Semantics/OpenMP/requires-modfile.f90 b/flang/test/Semantics/OpenMP/requires-modfile.f90 new file mode 100644 index 0000000..2f06104 --- /dev/null +++ b/flang/test/Semantics/OpenMP/requires-modfile.f90 @@ -0,0 +1,39 @@ +!RUN: %python %S/../test_modfile.py %s %flang_fc1 -fopenmp -fopenmp-version=52 + +module req +contains + +! The requirements from the subprograms should be added to the module. +subroutine f00 + !$omp requires reverse_offload +end + +subroutine f01 + !$omp requires atomic_default_mem_order(seq_cst) +end +end module + +module user +! The requirements from module req should be propagated to this module. +use req +end module + + +!Expect: req.mod +!module req +!!$omp requires atomic_default_mem_order(seq_cst) +!!$omp requires reverse_offload +!contains +!subroutine f00() +!end +!subroutine f01() +!end +!end + +!Expect: user.mod +!module user +!use req,only:f00 +!use req,only:f01 +!!$omp requires atomic_default_mem_order(seq_cst) +!!$omp requires reverse_offload +!end diff --git a/flang/test/Semantics/OpenMP/requires09.f90 b/flang/test/Semantics/OpenMP/requires09.f90 index 2fa5d950..ca6ad5e 100644 --- a/flang/test/Semantics/OpenMP/requires09.f90 +++ b/flang/test/Semantics/OpenMP/requires09.f90 @@ -3,12 +3,16 @@ ! 2.4 Requires directive ! All atomic_default_mem_order clauses in 'requires' directives found within a ! compilation unit must specify the same ordering. +!ERROR: Conflicting 'ATOMIC_DEFAULT_MEM_ORDER' REQUIRES clauses found in compilation unit +module m +contains subroutine f !$omp requires atomic_default_mem_order(seq_cst) end subroutine f -!ERROR: Conflicting 'ATOMIC_DEFAULT_MEM_ORDER' REQUIRES clauses found in compilation unit subroutine g !$omp requires atomic_default_mem_order(relaxed) end subroutine g + +end module diff --git a/flang/test/Semantics/dynamic-type-intrinsics.f90 b/flang/test/Semantics/dynamic-type-intrinsics.f90 new file mode 100644 index 0000000..a4ce3db --- /dev/null +++ b/flang/test/Semantics/dynamic-type-intrinsics.f90 @@ -0,0 +1,73 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +module m + type :: t1 + real :: x + end type + type :: t2(k) + integer, kind :: k + real(kind=k) :: x + end type + type :: t3 + real :: x + end type + type, extends(t1) :: t4 + integer :: y + end type + type :: t5 + sequence + integer :: x + integer :: y + end type + + integer :: i + real :: r + type(t1) :: x1, y1 + type(t2(4)) :: x24, y24 + type(t2(8)) :: x28 + type(t3) :: x3 + type(t4) :: x4 + type(t5) :: x5 + class(t1), allocatable :: a1 + class(t3), allocatable :: a3 + + integer(kind=merge(kind(1),-1,same_type_as(x1, x1))) same_type_as_x1_x1_true + integer(kind=merge(kind(1),-1,same_type_as(x1, y1))) same_type_as_x1_y1_true + integer(kind=merge(kind(1),-1,same_type_as(x24, x24))) same_type_as_x24_x24_true + integer(kind=merge(kind(1),-1,same_type_as(x24, y24))) same_type_as_x24_y24_true + integer(kind=merge(kind(1),-1,same_type_as(x24, x28))) same_type_as_x24_x28_true + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,same_type_as(x1, x3))) same_type_as_x1_x3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,same_type_as(a1, a3))) same_type_as_a1_a3_false + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_8 = same_type_as(x5, x5) + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_9 = same_type_as(x5, x1) + !ERROR: Actual argument for 'b=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t1_10 = same_type_as(x1, x5) + !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type + logical :: t1_11 = same_type_as(i, i) + !ERROR: Actual argument for 'a=' has bad type 'REAL(4)', expected extensible or unlimited polymorphic type + logical :: t1_12 = same_type_as(r, r) + !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type + logical :: t1_13 = same_type_as(i, t) + + integer(kind=merge(kind(1),-1,extends_type_of(x1, y1))) extends_type_of_x1_y1_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, x24))) extends_type_of_x24_x24_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, y24))) extends_type_of_x24_y24_true + integer(kind=merge(kind(1),-1,extends_type_of(x24, x28))) extends_type_of_x24_x28_true + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(x1, x3))) extends_type_of_x1_x3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(a1, a3))) extends_type_of_a1_a3_false + !ERROR: INTEGER(KIND=-1) is not a supported type + integer(kind=merge(kind(1),-1,extends_type_of(x1, x4))) extends_type_of_x1_x4_false + integer(kind=merge(kind(1),-1,extends_type_of(x4, x1))) extends_type_of_x4_x1_true + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_9 = extends_type_of(x5, x5) + !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_10 = extends_type_of(x5, x1) + !ERROR: Actual argument for 'mold=' has type 't5', but was expected to be an extensible or unlimited polymorphic type + logical :: t2_11 = extends_type_of(x1, x5) +end module diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90 index c00deed..6bb7a71 100644 --- a/flang/test/Semantics/io11.f90 +++ b/flang/test/Semantics/io11.f90 @@ -809,3 +809,24 @@ module m29 end end interface end + +module m30 + type base + character(5), allocatable :: data + end type + interface write(formatted) + subroutine formattedRead (dtv, unit, iotype, v_list, iostat, iomsg) + import base + !ERROR: Dummy argument 'dtv' of a defined input/output procedure must be a scalar + class (base), intent(in) :: dtv(10) + integer, intent(in) :: unit + !ERROR: Dummy argument 'iotype' of a defined input/output procedure must be a scalar + character(*), intent(in) :: iotype(2) + integer, intent(in) :: v_list(:) + !ERROR: Dummy argument 'iostat' of a defined input/output procedure must be a scalar + integer, intent(out) :: iostat(*) + !ERROR: Dummy argument 'iomsg' of a defined input/output procedure must be a scalar + character(*), intent(inout) :: iomsg(:) + end subroutine + end interface +end module diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt index 4ee9752..c5bd439 100644 --- a/flang/tools/fir-opt/CMakeLists.txt +++ b/flang/tools/fir-opt/CMakeLists.txt @@ -22,6 +22,7 @@ target_link_libraries(fir-opt PRIVATE HLFIRDialect HLFIRTransforms FIROpenACCSupport + FIROpenACCTransforms FIROpenMPSupport FlangOpenMPTransforms FIRAnalysis diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp index d66fc3f..b0b277b 100644 --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -14,6 +14,7 @@ #include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/HLFIR/Passes.h" +#include "flang/Optimizer/OpenACC/Passes.h" #include "flang/Optimizer/OpenMP/Passes.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Transforms/Passes.h" @@ -37,6 +38,7 @@ int main(int argc, char **argv) { fir::registerOptTransformPasses(); hlfir::registerHLFIRPasses(); flangomp::registerFlangOpenMPPasses(); + fir::acc::registerFIROpenACCPasses(); #ifdef FLANG_INCLUDE_TESTS fir::test::registerTestFIRAliasAnalysisPass(); fir::test::registerTestFIROpenACCInterfacesPass(); |