aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
Diffstat (limited to 'flang')
-rw-r--r--flang/include/flang/Evaluate/tools.h11
-rw-r--r--flang/include/flang/Optimizer/CMakeLists.txt1
-rw-r--r--flang/include/flang/Optimizer/OpenACC/CMakeLists.txt4
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Passes.h33
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Passes.td36
-rw-r--r--flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h7
-rw-r--r--flang/include/flang/Optimizer/Support/Utils.h6
-rw-r--r--flang/include/flang/Parser/dump-parse-tree.h2
-rw-r--r--flang/include/flang/Parser/parse-tree.h19
-rw-r--r--flang/include/flang/Semantics/openmp-modifiers.h1
-rw-r--r--flang/include/flang/Semantics/openmp-utils.h1
-rw-r--r--flang/include/flang/Semantics/symbol.h25
-rw-r--r--flang/lib/Evaluate/intrinsics.cpp23
-rw-r--r--flang/lib/Evaluate/tools.cpp12
-rw-r--r--flang/lib/Lower/Allocatable.cpp13
-rw-r--r--flang/lib/Lower/OpenACC.cpp27
-rw-r--r--flang/lib/Lower/OpenMP/ClauseProcessor.cpp6
-rw-r--r--flang/lib/Lower/OpenMP/Clauses.cpp17
-rw-r--r--flang/lib/Lower/OpenMP/OpenMP.cpp15
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp16
-rw-r--r--flang/lib/Optimizer/OpenACC/CMakeLists.txt1
-rw-r--r--flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp79
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp191
-rw-r--r--flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt12
-rw-r--r--flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp100
-rw-r--r--flang/lib/Optimizer/Support/Utils.cpp10
-rw-r--r--flang/lib/Parser/openmp-parsers.cpp9
-rw-r--r--flang/lib/Parser/unparse.cpp6
-rw-r--r--flang/lib/Semantics/check-declarations.cpp2
-rw-r--r--flang/lib/Semantics/check-omp-atomic.cpp39
-rw-r--r--flang/lib/Semantics/check-omp-structure.cpp56
-rw-r--r--flang/lib/Semantics/check-omp-structure.h8
-rw-r--r--flang/lib/Semantics/mod-file.cpp37
-rw-r--r--flang/lib/Semantics/openmp-modifiers.cpp16
-rw-r--r--flang/lib/Semantics/openmp-utils.cpp23
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp207
-rw-r--r--flang/lib/Semantics/resolve-directives.h2
-rw-r--r--flang/lib/Semantics/resolve-names.cpp3
-rw-r--r--flang/lib/Semantics/symbol.cpp32
-rw-r--r--flang/test/Fir/CUDA/cuda-code-gen.mlir28
-rw-r--r--flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir24
-rw-r--r--flang/test/Fir/OpenACC/recipe-bufferization.mlir316
-rw-r--r--flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf15
-rw-r--r--flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf9
-rw-r--r--flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf21
-rw-r--r--flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f905
-rw-r--r--flang/test/Lower/OpenACC/acc-private.f9050
-rw-r--r--flang/test/Lower/OpenMP/Todo/attach-modifier.f909
-rw-r--r--flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f9021
-rw-r--r--flang/test/Parser/OpenMP/map-modifiers-v61.f9064
-rw-r--r--flang/test/Semantics/OpenMP/dump-requires-details.f9014
-rw-r--r--flang/test/Semantics/OpenMP/map-modifiers-v61.f9049
-rw-r--r--flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f908
-rw-r--r--flang/test/Semantics/OpenMP/requires-modfile.f9039
-rw-r--r--flang/test/Semantics/OpenMP/requires09.f906
-rw-r--r--flang/test/Semantics/dynamic-type-intrinsics.f9073
-rw-r--r--flang/test/Semantics/io11.f9021
-rw-r--r--flang/tools/fir-opt/CMakeLists.txt1
-rw-r--r--flang/tools/fir-opt/fir-opt.cpp2
59 files changed, 1667 insertions, 216 deletions
diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h
index d8d0956..20a0919 100644
--- a/flang/include/flang/Evaluate/tools.h
+++ b/flang/include/flang/Evaluate/tools.h
@@ -1289,16 +1289,7 @@ bool CheckForCoindexedObject(parser::ContextualMessages &,
const std::optional<ActualArgument> &, const std::string &procName,
const std::string &argName);
-inline bool IsCUDADeviceSymbol(const Symbol &sym) {
- if (const auto *details =
- sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
- if (details->cudaDataAttr() &&
- *details->cudaDataAttr() != common::CUDADataAttr::Pinned) {
- return true;
- }
- }
- return false;
-}
+bool IsCUDADeviceSymbol(const Symbol &sym);
inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) {
if (const auto *details =
diff --git a/flang/include/flang/Optimizer/CMakeLists.txt b/flang/include/flang/Optimizer/CMakeLists.txt
index 3336ac9..68af52f 100644
--- a/flang/include/flang/Optimizer/CMakeLists.txt
+++ b/flang/include/flang/Optimizer/CMakeLists.txt
@@ -2,4 +2,5 @@ add_subdirectory(CodeGen)
add_subdirectory(Dialect)
add_subdirectory(HLFIR)
add_subdirectory(Transforms)
+add_subdirectory(OpenACC)
add_subdirectory(OpenMP)
diff --git a/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000..a032488
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS Passes.td)
+mlir_tablegen(Passes.h.inc -gen-pass-decls -name FIROpenACC)
+
+add_public_tablegen_target(FIROpenACCPassesIncGen)
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.h b/flang/include/flang/Optimizer/OpenACC/Passes.h
new file mode 100644
index 0000000..0627cc8
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.h
@@ -0,0 +1,33 @@
+//===- Passes.h - OpenACC pass entry points -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This header declares the OpenACC passes specific to Fortran and FIR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES_H
+#define FORTRAN_OPTIMIZER_OPENACC_PASSES_H
+
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Pass/PassRegistry.h"
+
+#include <memory>
+
+namespace fir {
+namespace acc {
+#define GEN_PASS_DECL
+#define GEN_PASS_REGISTRATION
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+
+std::unique_ptr<mlir::Pass> createACCRecipeBufferizationPass();
+
+} // namespace acc
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES_H
diff --git a/flang/include/flang/Optimizer/OpenACC/Passes.td b/flang/include/flang/Optimizer/OpenACC/Passes.td
new file mode 100644
index 0000000..3c127b3
--- /dev/null
+++ b/flang/include/flang/Optimizer/OpenACC/Passes.td
@@ -0,0 +1,36 @@
+//===-- Passes.td - flang OpenACC pass definitions -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_OPENACC_PASSES
+#define FORTRAN_OPTIMIZER_OPENACC_PASSES
+
+include "mlir/Pass/PassBase.td"
+
+def ACCRecipeBufferization
+ : Pass<"fir-acc-recipe-bufferization", "mlir::ModuleOp"> {
+ let summary = "Rewrite acc.*.recipe box values to ref<box> and update uses";
+ let description = [{
+ Bufferizes OpenACC recipes that operate on fir.box<T> so their type and
+ region block arguments become fir.ref<fir.box<T>> instead. This applies to
+ acc.private.recipe, acc.firstprivate.recipe (including copy region), and
+ acc.reduction.recipe (including combiner region).
+
+ For affected regions, the pass inserts required loads at the beginning of
+ the region to preserve original uses after argument type changes. For yields
+ of box values, the pass allocates a local fir.ref<fir.box<T>> and stores the
+ yielded fir.box<T> into it so the region yields a reference to a box.
+
+ For acc.private, acc.firstprivate, and acc.reduction operations that use a
+ bufferized recipe, the pass allocates a host-side fir.ref<fir.box<T>> before
+ the data op and rewires the data op to use the new memory. Other users of
+ the original data operation result (outside the paired compute op) are
+ updated to load through the reference.
+ }];
+}
+
+#endif // FORTRAN_OPTIMIZER_OPENACC_PASSES
diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
index 1085393..408f039 100644
--- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
+++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h
@@ -57,8 +57,11 @@ struct OpenACCMappableModel
mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var,
llvm::StringRef varName,
- mlir::ValueRange extents,
- mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal,
+ bool &needsDestroy) const;
+
+ bool generatePrivateDestroy(mlir::Type type, mlir::OpBuilder &builder,
+ mlir::Location loc, mlir::Value privatized) const;
};
} // namespace fir::acc
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 0b31cfe..bbb7e6e 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -200,6 +200,12 @@ std::optional<llvm::ArrayRef<int64_t>> getComponentLowerBoundsIfNonDefault(
fir::RecordType recordType, llvm::StringRef component,
mlir::ModuleOp module, const mlir::SymbolTable *symbolTable = nullptr);
+/// Indicate if a derived type has final routine. Returns std::nullopt if that
+/// information is not in the IR;
+std::optional<bool>
+isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+ const mlir::SymbolTable *symbolTable = nullptr);
+
/// Generate a LLVM constant value of type `ity`, using the provided offset.
mlir::LLVM::ConstantOp
genConstantIndex(mlir::Location loc, mlir::Type ity,
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 1488529..91af92c0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -520,6 +520,8 @@ public:
NODE(parser, OmpAtClause)
NODE_ENUM(OmpAtClause, ActionTime)
NODE(parser, OmpAtomicDefaultMemOrderClause)
+ NODE(parser, OmpAttachModifier)
+ NODE_ENUM(OmpAttachModifier, Value)
NODE(parser, OmpAutomapModifier)
NODE_ENUM(OmpAutomapModifier, Value)
NODE(parser, OmpBaseVariantNames)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index d919b77..f52323c 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3813,6 +3813,18 @@ struct OmpAlwaysModifier {
WRAPPER_CLASS_BOILERPLATE(OmpAlwaysModifier, Value);
};
+// Ref: [coming in 6.1]
+//
+// attach-modifier ->
+// ATTACH(attachment-mode) // since 6.1
+//
+// attachment-mode ->
+// ALWAYS | AUTO | NEVER
+struct OmpAttachModifier {
+ ENUM_CLASS(Value, Always, Never, Auto)
+ WRAPPER_CLASS_BOILERPLATE(OmpAttachModifier, Value);
+};
+
// Ref: [6.0:289-290]
//
// automap-modifier ->
@@ -4575,6 +4587,7 @@ struct OmpLoopRangeClause {
// modifier ->
// map-type-modifier [replaced] | // since 4.5, until 5.2
// always-modifier | // since 6.0
+// attach-modifier | // since 6.1
// close-modifier | // since 6.0
// delete-modifier | // since 6.0
// present-modifier | // since 6.0
@@ -4589,9 +4602,9 @@ struct OmpLoopRangeClause {
// and delete-modifier has been split from map-type.
struct OmpMapClause {
TUPLE_CLASS_BOILERPLATE(OmpMapClause);
- MODIFIER_BOILERPLATE(OmpAlwaysModifier, OmpCloseModifier, OmpDeleteModifier,
- OmpMapTypeModifier, OmpPresentModifier, OmpRefModifier, OmpSelfModifier,
- OmpMapper, OmpIterator, OmpMapType, OmpxHoldModifier);
+ MODIFIER_BOILERPLATE(OmpAlwaysModifier, OmpAttachModifier, OmpCloseModifier,
+ OmpDeleteModifier, OmpMapTypeModifier, OmpPresentModifier, OmpRefModifier,
+ OmpSelfModifier, OmpMapper, OmpIterator, OmpMapType, OmpxHoldModifier);
std::tuple<MODIFIERS(), OmpObjectList, /*CommaSeparated=*/bool> t;
};
diff --git a/flang/include/flang/Semantics/openmp-modifiers.h b/flang/include/flang/Semantics/openmp-modifiers.h
index e0eae98..bfa3aa4 100644
--- a/flang/include/flang/Semantics/openmp-modifiers.h
+++ b/flang/include/flang/Semantics/openmp-modifiers.h
@@ -72,6 +72,7 @@ DECLARE_DESCRIPTOR(parser::OmpAlignModifier);
DECLARE_DESCRIPTOR(parser::OmpAllocatorComplexModifier);
DECLARE_DESCRIPTOR(parser::OmpAllocatorSimpleModifier);
DECLARE_DESCRIPTOR(parser::OmpAlwaysModifier);
+DECLARE_DESCRIPTOR(parser::OmpAttachModifier);
DECLARE_DESCRIPTOR(parser::OmpAutomapModifier);
DECLARE_DESCRIPTOR(parser::OmpChunkModifier);
DECLARE_DESCRIPTOR(parser::OmpCloseModifier);
diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h
index 2954a1c..0f85183 100644
--- a/flang/include/flang/Semantics/openmp-utils.h
+++ b/flang/include/flang/Semantics/openmp-utils.h
@@ -38,6 +38,7 @@ template <typename T, typename U = std::remove_const_t<T>> U AsRvalue(T &t) {
template <typename T> T &&AsRvalue(T &&t) { return std::move(t); }
const Scope &GetScopingUnit(const Scope &scope);
+const Scope &GetProgramUnit(const Scope &scope);
// There is no consistent way to get the source of an ActionStmt, but there
// is "source" in Statement<T>. This structure keeps the ActionStmt with the
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 77f567e..04a0639 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -16,6 +16,7 @@
#include "flang/Semantics/module-dependences.h"
#include "flang/Support/Fortran.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
#include <array>
#include <functional>
@@ -50,32 +51,34 @@ using MutableSymbolVector = std::vector<MutableSymbolRef>;
// Mixin for details with OpenMP declarative constructs.
class WithOmpDeclarative {
- using OmpAtomicOrderType = common::OmpMemoryOrderType;
-
public:
- ENUM_CLASS(RequiresFlag, ReverseOffload, UnifiedAddress, UnifiedSharedMemory,
- DynamicAllocators);
- using RequiresFlags = common::EnumSet<RequiresFlag, RequiresFlag_enumSize>;
+ // The set of requirements for any program unit include requirements
+ // from any module used in the program unit.
+ using RequiresClauses =
+ common::EnumSet<llvm::omp::Clause, llvm::omp::Clause_enumSize>;
bool has_ompRequires() const { return ompRequires_.has_value(); }
- const RequiresFlags *ompRequires() const {
+ const RequiresClauses *ompRequires() const {
return ompRequires_ ? &*ompRequires_ : nullptr;
}
- void set_ompRequires(RequiresFlags flags) { ompRequires_ = flags; }
+ void set_ompRequires(RequiresClauses clauses) { ompRequires_ = clauses; }
bool has_ompAtomicDefaultMemOrder() const {
return ompAtomicDefaultMemOrder_.has_value();
}
- const OmpAtomicOrderType *ompAtomicDefaultMemOrder() const {
+ const common::OmpMemoryOrderType *ompAtomicDefaultMemOrder() const {
return ompAtomicDefaultMemOrder_ ? &*ompAtomicDefaultMemOrder_ : nullptr;
}
- void set_ompAtomicDefaultMemOrder(OmpAtomicOrderType flags) {
+ void set_ompAtomicDefaultMemOrder(common::OmpMemoryOrderType flags) {
ompAtomicDefaultMemOrder_ = flags;
}
+ friend llvm::raw_ostream &operator<<(
+ llvm::raw_ostream &, const WithOmpDeclarative &);
+
private:
- std::optional<RequiresFlags> ompRequires_;
- std::optional<OmpAtomicOrderType> ompAtomicDefaultMemOrder_;
+ std::optional<RequiresClauses> ompRequires_;
+ std::optional<common::OmpMemoryOrderType> ompAtomicDefaultMemOrder_;
};
// A module or submodule.
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index f204eef..1de5e6b 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -111,6 +111,7 @@ ENUM_CLASS(KindCode, none, defaultIntegerKind,
atomicIntKind, // atomic_int_kind from iso_fortran_env
atomicIntOrLogicalKind, // atomic_int_kind or atomic_logical_kind
sameAtom, // same type and kind as atom
+ extensibleOrUnlimitedType, // extensible or unlimited polymorphic type
)
struct TypePattern {
@@ -160,7 +161,8 @@ static constexpr TypePattern AnyChar{CharType, KindCode::any};
static constexpr TypePattern AnyLogical{LogicalType, KindCode::any};
static constexpr TypePattern AnyRelatable{RelatableType, KindCode::any};
static constexpr TypePattern AnyIntrinsic{IntrinsicType, KindCode::any};
-static constexpr TypePattern ExtensibleDerived{DerivedType, KindCode::any};
+static constexpr TypePattern ExtensibleDerived{
+ DerivedType, KindCode::extensibleOrUnlimitedType};
static constexpr TypePattern AnyData{AnyType, KindCode::any};
// Type is irrelevant, but not BOZ (for PRESENT(), OPTIONAL(), &c.)
@@ -2103,9 +2105,13 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
}
return std::nullopt;
} else if (!d.typePattern.categorySet.test(type->category())) {
+ const char *expected{
+ d.typePattern.kindCode == KindCode::extensibleOrUnlimitedType
+ ? ", expected extensible or unlimited polymorphic type"
+ : ""};
messages.Say(arg->sourceLocation(),
- "Actual argument for '%s=' has bad type '%s'"_err_en_US, d.keyword,
- type->AsFortran());
+ "Actual argument for '%s=' has bad type '%s'%s"_err_en_US, d.keyword,
+ type->AsFortran(), expected);
return std::nullopt; // argument has invalid type category
}
bool argOk{false};
@@ -2244,6 +2250,17 @@ std::optional<SpecificCall> IntrinsicInterface::Match(
return std::nullopt;
}
break;
+ case KindCode::extensibleOrUnlimitedType:
+ argOk = type->IsUnlimitedPolymorphic() ||
+ (type->category() == TypeCategory::Derived &&
+ IsExtensibleType(GetDerivedTypeSpec(type)));
+ if (!argOk) {
+ messages.Say(arg->sourceLocation(),
+ "Actual argument for '%s=' has type '%s', but was expected to be an extensible or unlimited polymorphic type"_err_en_US,
+ d.keyword, type->AsFortran());
+ return std::nullopt;
+ }
+ break;
default:
CRASH_NO_CASE;
}
diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp
index b927fa3..bd06acc 100644
--- a/flang/lib/Evaluate/tools.cpp
+++ b/flang/lib/Evaluate/tools.cpp
@@ -1153,6 +1153,18 @@ bool HasCUDAImplicitTransfer(const Expr<SomeType> &expr) {
return (hasConstant || (hostSymbols.size() > 0)) && deviceSymbols.size() > 0;
}
+bool IsCUDADeviceSymbol(const Symbol &sym) {
+ if (const auto *details =
+ sym.GetUltimate().detailsIf<semantics::ObjectEntityDetails>()) {
+ return details->cudaDataAttr() &&
+ *details->cudaDataAttr() != common::CUDADataAttr::Pinned;
+ } else if (const auto *details =
+ sym.GetUltimate().detailsIf<semantics::AssocEntityDetails>()) {
+ return GetNbOfCUDADeviceSymbols(details->expr()) > 0;
+ }
+ return false;
+}
+
// HasVectorSubscript()
struct HasVectorSubscriptHelper
: public AnyTraverse<HasVectorSubscriptHelper, bool,
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 53239cb..e7a6c4d 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -629,6 +629,10 @@ private:
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
fir::ExtendedValue exv = isSource ? sourceExv : moldExv;
+ if (const Fortran::semantics::Symbol *sym{GetLastSymbol(sourceExpr)})
+ if (Fortran::semantics::IsCUDADevice(*sym))
+ TODO(loc, "CUDA Fortran: allocate with device source");
+
// Generate a sequence of runtime calls.
errorManager.genStatCheck(builder, loc);
genAllocateObjectInit(box, allocatorIdx);
@@ -767,6 +771,15 @@ private:
const fir::MutableBoxValue &box,
ErrorManager &errorManager,
const Fortran::semantics::Symbol &sym) {
+
+ if (const Fortran::semantics::DeclTypeSpec *declTypeSpec = sym.GetType())
+ if (const Fortran::semantics::DerivedTypeSpec *derivedTypeSpec =
+ declTypeSpec->AsDerived())
+ if (derivedTypeSpec->HasDefaultInitialization(
+ /*ignoreAllocatable=*/true, /*ignorePointer=*/true))
+ TODO(loc,
+ "CUDA Fortran: allocate on device with default initialization");
+
Fortran::lower::StatementContext stmtCtx;
cuf::DataAttributeAttr cudaAttr =
Fortran::lower::translateSymbolCUFDataAttribute(builder.getContext(),
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index 62e5c0c..cfb1891 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -978,15 +978,40 @@ static RecipeOp genRecipeOp(
auto mappableTy = mlir::dyn_cast<mlir::acc::MappableType>(ty);
assert(mappableTy &&
"Expected that all variable types are considered mappable");
+ bool needsDestroy = false;
auto retVal = mappableTy.generatePrivateInit(
builder, loc,
mlir::cast<mlir::TypedValue<mlir::acc::MappableType>>(
initBlock->getArgument(0)),
initName,
initBlock->getArguments().take_back(initBlock->getArguments().size() - 1),
- initValue);
+ initValue, needsDestroy);
mlir::acc::YieldOp::create(builder, loc,
retVal ? retVal : initBlock->getArgument(0));
+ // Create destroy region and generate destruction if requested.
+ if (needsDestroy) {
+ llvm::SmallVector<mlir::Type> destroyArgsTy;
+ llvm::SmallVector<mlir::Location> destroyArgsLoc;
+ // original and privatized/reduction value
+ destroyArgsTy.push_back(ty);
+ destroyArgsTy.push_back(ty);
+ destroyArgsLoc.push_back(loc);
+ destroyArgsLoc.push_back(loc);
+ // Append bounds arguments (if any) in the same order as init region
+ if (argsTy.size() > 1) {
+ destroyArgsTy.append(argsTy.begin() + 1, argsTy.end());
+ destroyArgsLoc.insert(destroyArgsLoc.end(), argsTy.size() - 1, loc);
+ }
+
+ builder.createBlock(&recipe.getDestroyRegion(),
+ recipe.getDestroyRegion().end(), destroyArgsTy,
+ destroyArgsLoc);
+ builder.setInsertionPointToEnd(&recipe.getDestroyRegion().back());
+ // Call interface on the privatized/reduction value (2nd argument).
+ (void)mappableTy.generatePrivateDestroy(
+ builder, loc, recipe.getDestroyRegion().front().getArgument(1));
+ mlir::acc::TerminatorOp::create(builder, loc);
+ }
return recipe;
}
diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
index 55eda7e..85398be 100644
--- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
+++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp
@@ -1343,8 +1343,10 @@ bool ClauseProcessor::processMap(
const parser::CharBlock &source) {
using Map = omp::clause::Map;
mlir::Location clauseLocation = converter.genLocation(source);
- const auto &[mapType, typeMods, refMod, mappers, iterator, objects] =
- clause.t;
+ const auto &[mapType, typeMods, attachMod, refMod, mappers, iterator,
+ objects] = clause.t;
+ if (attachMod)
+ TODO(currentLocation, "ATTACH modifier is not implemented yet");
llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_NONE;
std::string mapperIdName = "__implicit_mapper";
diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp
index fac37a3..0842c62 100644
--- a/flang/lib/Lower/OpenMP/Clauses.cpp
+++ b/flang/lib/Lower/OpenMP/Clauses.cpp
@@ -1069,6 +1069,15 @@ Map make(const parser::OmpClause::Map &inp,
);
CLAUSET_ENUM_CONVERT( //
+ convertAttachMod, parser::OmpAttachModifier::Value, Map::AttachModifier,
+ // clang-format off
+ MS(Always, Always)
+ MS(Auto, Auto)
+ MS(Never, Never)
+ // clang-format on
+ );
+
+ CLAUSET_ENUM_CONVERT( //
convertRefMod, parser::OmpRefModifier::Value, Map::RefModifier,
// clang-format off
MS(Ref_Ptee, RefPtee)
@@ -1115,6 +1124,13 @@ Map make(const parser::OmpClause::Map &inp,
if (!modSet.empty())
maybeTypeMods = Map::MapTypeModifiers(modSet.begin(), modSet.end());
+ auto attachMod = [&]() -> std::optional<Map::AttachModifier> {
+ if (auto *t =
+ semantics::OmpGetUniqueModifier<parser::OmpAttachModifier>(mods))
+ return convertAttachMod(t->v);
+ return std::nullopt;
+ }();
+
auto refMod = [&]() -> std::optional<Map::RefModifier> {
if (auto *t = semantics::OmpGetUniqueModifier<parser::OmpRefModifier>(mods))
return convertRefMod(t->v);
@@ -1135,6 +1151,7 @@ Map make(const parser::OmpClause::Map &inp,
return Map{{/*MapType=*/std::move(type),
/*MapTypeModifiers=*/std::move(maybeTypeMods),
+ /*AttachModifier=*/std::move(attachMod),
/*RefModifier=*/std::move(refMod), /*Mapper=*/std::move(mappers),
/*Iterator=*/std::move(iterator),
/*LocatorList=*/makeObjects(t2, semaCtx)}};
diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp
index 444f274..f86ee01 100644
--- a/flang/lib/Lower/OpenMP/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP/OpenMP.cpp
@@ -4208,18 +4208,17 @@ bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions(
void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
const semantics::Symbol *symbol) {
using MlirRequires = mlir::omp::ClauseRequires;
- using SemaRequires = semantics::WithOmpDeclarative::RequiresFlag;
if (auto offloadMod =
llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) {
- semantics::WithOmpDeclarative::RequiresFlags semaFlags;
+ semantics::WithOmpDeclarative::RequiresClauses reqs;
if (symbol) {
common::visit(
[&](const auto &details) {
if constexpr (std::is_base_of_v<semantics::WithOmpDeclarative,
std::decay_t<decltype(details)>>) {
if (details.has_ompRequires())
- semaFlags = *details.ompRequires();
+ reqs = *details.ompRequires();
}
},
symbol->details());
@@ -4228,14 +4227,14 @@ void Fortran::lower::genOpenMPRequires(mlir::Operation *mod,
// Use pre-populated omp.requires module attribute if it was set, so that
// the "-fopenmp-force-usm" compiler option is honored.
MlirRequires mlirFlags = offloadMod.getRequires();
- if (semaFlags.test(SemaRequires::ReverseOffload))
+ if (reqs.test(llvm::omp::Clause::OMPC_dynamic_allocators))
+ mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
+ if (reqs.test(llvm::omp::Clause::OMPC_reverse_offload))
mlirFlags = mlirFlags | MlirRequires::reverse_offload;
- if (semaFlags.test(SemaRequires::UnifiedAddress))
+ if (reqs.test(llvm::omp::Clause::OMPC_unified_address))
mlirFlags = mlirFlags | MlirRequires::unified_address;
- if (semaFlags.test(SemaRequires::UnifiedSharedMemory))
+ if (reqs.test(llvm::omp::Clause::OMPC_unified_shared_memory))
mlirFlags = mlirFlags | MlirRequires::unified_shared_memory;
- if (semaFlags.test(SemaRequires::DynamicAllocators))
- mlirFlags = mlirFlags | MlirRequires::dynamic_allocators;
offloadMod.setRequires(mlirFlags);
}
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 0afb295..70bb43a2 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -176,6 +176,19 @@ struct AddrOfOpConversion : public fir::FIROpConversion<fir::AddrOfOp> {
llvm::LogicalResult
matchAndRewrite(fir::AddrOfOp addr, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const override {
+
+ if (auto gpuMod = addr->getParentOfType<mlir::gpu::GPUModuleOp>()) {
+ auto global = gpuMod.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
+ replaceWithAddrOfOrASCast(
+ rewriter, addr->getLoc(),
+ global ? global.getAddrSpace() : getGlobalAddressSpace(rewriter),
+ getProgramAddressSpace(rewriter),
+ global ? global.getSymName()
+ : addr.getSymbol().getRootReference().getValue(),
+ convertType(addr.getType()), addr);
+ return mlir::success();
+ }
+
auto global = addr->getParentOfType<mlir::ModuleOp>()
.lookupSymbol<mlir::LLVM::GlobalOp>(addr.getSymbol());
replaceWithAddrOfOrASCast(
@@ -3231,7 +3244,8 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
if (global.getDataAttr() &&
*global.getDataAttr() == cuf::DataAttribute::Constant)
- TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation");
+ g.setAddrSpace(
+ static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Constant));
rewriter.eraseOp(global);
return mlir::success();
diff --git a/flang/lib/Optimizer/OpenACC/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
index fc23e64..790b9fd 100644
--- a/flang/lib/Optimizer/OpenACC/CMakeLists.txt
+++ b/flang/lib/Optimizer/OpenACC/CMakeLists.txt
@@ -1 +1,2 @@
add_subdirectory(Support)
+add_subdirectory(Transforms)
diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
index 89aa010..9bf10b5 100644
--- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
+++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp
@@ -21,6 +21,7 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Support/Utils.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/IR/BuiltinOps.h"
@@ -352,6 +353,14 @@ getBaseRef(mlir::TypedValue<mlir::acc::PointerLikeType> varPtr) {
// calculation op.
mlir::Value baseRef =
llvm::TypeSwitch<mlir::Operation *, mlir::Value>(op)
+ .Case<fir::DeclareOp>([&](auto op) {
+ // If this declare binds a view with an underlying storage operand,
+ // treat that storage as the base reference. Otherwise, fall back
+ // to the declared memref.
+ if (auto storage = op.getStorage())
+ return storage;
+ return mlir::Value(varPtr);
+ })
.Case<hlfir::DesignateOp>([&](auto op) {
// Get the base object.
return op.getMemref();
@@ -548,14 +557,27 @@ template <typename Ty>
mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const {
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const {
+ needsDestroy = false;
mlir::Value retVal;
mlir::Type unwrappedTy = fir::unwrapRefType(type);
mlir::ModuleOp mod = builder.getInsertionBlock()
->getParent()
->getParentOfType<mlir::ModuleOp>();
- fir::FirOpBuilder firBuilder(builder, mod);
+ if (auto recType = llvm::dyn_cast<fir::RecordType>(
+ fir::getFortranElementType(unwrappedTy))) {
+ // Need to make deep copies of allocatable components.
+ if (fir::isRecordWithAllocatableMember(recType))
+ TODO(loc,
+ "OpenACC: privatizing derived type with allocatable components");
+ // Need to decide if user assignment/final routine should be called.
+ if (fir::isRecordWithFinalRoutine(recType, mod).value_or(false))
+ TODO(loc, "OpenACC: privatizing derived type with user assignment or "
+ "final routine ");
+ }
+
+ fir::FirOpBuilder firBuilder(builder, mod);
auto getDeclareOpForType = [&](mlir::Type ty) -> hlfir::DeclareOp {
auto alloca = fir::AllocaOp::create(firBuilder, loc, ty);
return hlfir::DeclareOp::create(firBuilder, loc, alloca, varName);
@@ -615,9 +637,11 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
mlir::Value firClass =
fir::EmboxOp::create(builder, loc, boxTy, allocatedScalar);
fir::StoreOp::create(builder, loc, firClass, retVal);
+ needsDestroy = true;
} else if (mlir::isa<fir::SequenceType>(innerTy)) {
hlfir::Entity source = hlfir::Entity{var};
- auto [temp, cleanup] = hlfir::createTempFromMold(loc, firBuilder, source);
+ auto [temp, cleanupFlag] =
+ hlfir::createTempFromMold(loc, firBuilder, source);
if (fir::isa_ref_type(type)) {
// When the temp is created - it is not a reference - thus we can
// end up with a type inconsistency. Therefore ensure storage is created
@@ -636,6 +660,9 @@ mlir::Value OpenACCMappableModel<Ty>::generatePrivateInit(
} else {
retVal = temp;
}
+ // If heap was allocated, a destroy is required later.
+ if (cleanupFlag)
+ needsDestroy = true;
} else {
TODO(loc, "Unsupported boxed type for OpenACC private-like recipe");
}
@@ -667,23 +694,61 @@ template mlir::Value
OpenACCMappableModel<fir::BaseBoxType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::ReferenceType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value OpenACCMappableModel<fir::HeapType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
template mlir::Value
OpenACCMappableModel<fir::PointerType>::generatePrivateInit(
mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
mlir::TypedValue<mlir::acc::MappableType> var, llvm::StringRef varName,
- mlir::ValueRange extents, mlir::Value initVal) const;
+ mlir::ValueRange extents, mlir::Value initVal, bool &needsDestroy) const;
+
+template <typename Ty>
+bool OpenACCMappableModel<Ty>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const {
+ mlir::Type unwrappedTy = fir::unwrapRefType(type);
+ // For boxed scalars allocated with AllocMem during init, free the heap.
+ if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(unwrappedTy)) {
+ mlir::Value boxVal = privatized;
+ if (fir::isa_ref_type(boxVal.getType()))
+ boxVal = fir::LoadOp::create(builder, loc, boxVal);
+ mlir::Value addr = fir::BoxAddrOp::create(builder, loc, boxVal);
+ // FreeMem only accepts fir.heap and this may not be represented in the box
+ // type if the privatized entity is not an allocatable.
+ mlir::Type heapType =
+ fir::HeapType::get(fir::unwrapRefType(addr.getType()));
+ if (heapType != addr.getType())
+ addr = fir::ConvertOp::create(builder, loc, heapType, addr);
+ fir::FreeMemOp::create(builder, loc, addr);
+ return true;
+ }
+
+ // Nothing to do for other categories by default, they are stack allocated.
+ return true;
+}
+
+template bool OpenACCMappableModel<fir::BaseBoxType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::ReferenceType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::HeapType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
+template bool OpenACCMappableModel<fir::PointerType>::generatePrivateDestroy(
+ mlir::Type type, mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value privatized) const;
} // namespace fir::acc
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
new file mode 100644
index 0000000..4840a99
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/ACCRecipeBufferization.cpp
@@ -0,0 +1,191 @@
+//===- ACCRecipeBufferization.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Bufferize OpenACC recipes that yield fir.box<T> to operate on
+// fir.ref<fir.box<T>> and update uses accordingly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/IR/Block.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/SymbolTable.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/Visitors.h"
+#include "llvm/ADT/TypeSwitch.h"
+
+namespace fir::acc {
+#define GEN_PASS_DEF_ACCRECIPEBUFFERIZATION
+#include "flang/Optimizer/OpenACC/Passes.h.inc"
+} // namespace fir::acc
+
+namespace {
+
+class BufferizeInterface {
+public:
+ static std::optional<mlir::Type> mustBufferize(mlir::Type recipeType) {
+ if (auto boxTy = llvm::dyn_cast<fir::BaseBoxType>(recipeType))
+ return fir::ReferenceType::get(boxTy);
+ return std::nullopt;
+ }
+
+ static mlir::Operation *load(mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value value) {
+ return builder.create<fir::LoadOp>(loc, value);
+ }
+
+ static mlir::Value placeInMemory(mlir::OpBuilder &builder, mlir::Location loc,
+ mlir::Value value) {
+ auto alloca = builder.create<fir::AllocaOp>(loc, value.getType());
+ builder.create<fir::StoreOp>(loc, value, alloca);
+ return alloca;
+ }
+};
+
+static void bufferizeRegionArgsAndYields(mlir::Region &region,
+ mlir::Location loc, mlir::Type oldType,
+ mlir::Type newType) {
+ if (region.empty())
+ return;
+
+ mlir::OpBuilder builder(&region);
+ for (mlir::BlockArgument arg : region.getArguments()) {
+ if (arg.getType() == oldType) {
+ arg.setType(newType);
+ if (!arg.use_empty()) {
+ mlir::Operation *loadOp = BufferizeInterface::load(builder, loc, arg);
+ arg.replaceAllUsesExcept(loadOp->getResult(0), loadOp);
+ }
+ }
+ }
+ if (auto yield =
+ llvm::dyn_cast<mlir::acc::YieldOp>(region.back().getTerminator())) {
+ llvm::SmallVector<mlir::Value> newOperands;
+ newOperands.reserve(yield.getNumOperands());
+ bool changed = false;
+ for (mlir::Value oldYieldArg : yield.getOperands()) {
+ if (oldYieldArg.getType() == oldType) {
+ builder.setInsertionPoint(yield);
+ mlir::Value alloca =
+ BufferizeInterface::placeInMemory(builder, loc, oldYieldArg);
+ newOperands.push_back(alloca);
+ changed = true;
+ } else {
+ newOperands.push_back(oldYieldArg);
+ }
+ }
+ if (changed)
+ yield->setOperands(newOperands);
+ }
+}
+
+static void updateRecipeUse(mlir::ArrayAttr recipes, mlir::ValueRange operands,
+ llvm::StringRef recipeSymName,
+ mlir::Operation *computeOp) {
+ if (!recipes)
+ return;
+ for (auto [recipeSym, oldRes] : llvm::zip(recipes, operands)) {
+ if (llvm::cast<mlir::SymbolRefAttr>(recipeSym).getLeafReference() !=
+ recipeSymName)
+ continue;
+
+ mlir::Operation *dataOp = oldRes.getDefiningOp();
+ assert(dataOp && "dataOp must be paired with computeOp");
+ mlir::Location loc = dataOp->getLoc();
+ mlir::OpBuilder builder(dataOp);
+ llvm::TypeSwitch<mlir::Operation *, void>(dataOp)
+ .Case<mlir::acc::PrivateOp, mlir::acc::FirstprivateOp,
+ mlir::acc::ReductionOp>([&](auto privateOp) {
+ builder.setInsertionPointAfterValue(privateOp.getVar());
+ mlir::Value alloca = BufferizeInterface::placeInMemory(
+ builder, loc, privateOp.getVar());
+ privateOp.getVarMutable().assign(alloca);
+ privateOp.getAccVar().setType(alloca.getType());
+ });
+
+ llvm::SmallVector<mlir::Operation *> users(oldRes.getUsers().begin(),
+ oldRes.getUsers().end());
+ for (mlir::Operation *useOp : users) {
+ if (useOp == computeOp)
+ continue;
+ builder.setInsertionPoint(useOp);
+ mlir::Operation *load = BufferizeInterface::load(builder, loc, oldRes);
+ useOp->replaceUsesOfWith(oldRes, load->getResult(0));
+ }
+ }
+}
+
+class ACCRecipeBufferization
+ : public fir::acc::impl::ACCRecipeBufferizationBase<
+ ACCRecipeBufferization> {
+public:
+ void runOnOperation() override {
+ mlir::ModuleOp module = getOperation();
+
+ llvm::SmallVector<llvm::StringRef> recipeNames;
+ module.walk([&](mlir::Operation *recipe) {
+ llvm::TypeSwitch<mlir::Operation *, void>(recipe)
+ .Case<mlir::acc::PrivateRecipeOp, mlir::acc::FirstprivateRecipeOp,
+ mlir::acc::ReductionRecipeOp>([&](auto recipe) {
+ mlir::Type oldType = recipe.getType();
+ auto bufferizedType =
+ BufferizeInterface::mustBufferize(recipe.getType());
+ if (!bufferizedType)
+ return;
+ recipe.setTypeAttr(mlir::TypeAttr::get(*bufferizedType));
+ mlir::Location loc = recipe.getLoc();
+ using RecipeOp = decltype(recipe);
+ bufferizeRegionArgsAndYields(recipe.getInitRegion(), loc, oldType,
+ *bufferizedType);
+ if constexpr (std::is_same_v<RecipeOp,
+ mlir::acc::FirstprivateRecipeOp>)
+ bufferizeRegionArgsAndYields(recipe.getCopyRegion(), loc, oldType,
+ *bufferizedType);
+ if constexpr (std::is_same_v<RecipeOp,
+ mlir::acc::ReductionRecipeOp>)
+ bufferizeRegionArgsAndYields(recipe.getCombinerRegion(), loc,
+ oldType, *bufferizedType);
+ bufferizeRegionArgsAndYields(recipe.getDestroyRegion(), loc,
+ oldType, *bufferizedType);
+ recipeNames.push_back(recipe.getSymName());
+ });
+ });
+ if (recipeNames.empty())
+ return;
+
+ module.walk([&](mlir::Operation *op) {
+ llvm::TypeSwitch<mlir::Operation *, void>(op)
+ .Case<mlir::acc::LoopOp, mlir::acc::ParallelOp, mlir::acc::SerialOp>(
+ [&](auto computeOp) {
+ for (llvm::StringRef recipeName : recipeNames) {
+ if (computeOp.getPrivatizationRecipes())
+ updateRecipeUse(computeOp.getPrivatizationRecipesAttr(),
+ computeOp.getPrivateOperands(), recipeName,
+ op);
+ if (computeOp.getFirstprivatizationRecipes())
+ updateRecipeUse(
+ computeOp.getFirstprivatizationRecipesAttr(),
+ computeOp.getFirstprivateOperands(), recipeName, op);
+ if (computeOp.getReductionRecipes())
+ updateRecipeUse(computeOp.getReductionRecipesAttr(),
+ computeOp.getReductionOperands(),
+ recipeName, op);
+ }
+ });
+ });
+ }
+};
+
+} // namespace
+
+std::unique_ptr<mlir::Pass> fir::acc::createACCRecipeBufferizationPass() {
+ return std::make_unique<ACCRecipeBufferization>();
+}
diff --git a/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
new file mode 100644
index 0000000..2427da0
--- /dev/null
+++ b/flang/lib/Optimizer/OpenACC/Transforms/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_flang_library(FIROpenACCTransforms
+ ACCRecipeBufferization.cpp
+
+ DEPENDS
+ FIROpenACCPassesIncGen
+
+ LINK_LIBS
+ MLIRIR
+ MLIRPass
+ FIRDialect
+ MLIROpenACCDialect
+)
diff --git a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
index 260e525..2bbd803 100644
--- a/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
+++ b/flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp
@@ -40,6 +40,7 @@
#include "mlir/IR/SymbolTable.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -128,6 +129,17 @@ class MapInfoFinalizationPass
}
}
+ /// Return true if the module has an OpenMP requires clause that includes
+ /// unified_shared_memory.
+ static bool moduleRequiresUSM(mlir::ModuleOp module) {
+ assert(module && "invalid module");
+ if (auto req = module->getAttrOfType<mlir::omp::ClauseRequiresAttr>(
+ "omp.requires"))
+ return mlir::omp::bitEnumContainsAll(
+ req.getValue(), mlir::omp::ClauseRequires::unified_shared_memory);
+ return false;
+ }
+
/// Create the member map for coordRef and append it (and its index
/// path) to the provided new* vectors, if it is not already present.
void appendMemberMapIfNew(
@@ -425,8 +437,12 @@ class MapInfoFinalizationPass
mapFlags flags = mapFlags::OMP_MAP_TO |
(mapFlags(mapTypeFlag) &
- (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_CLOSE |
- mapFlags::OMP_MAP_ALWAYS));
+ (mapFlags::OMP_MAP_IMPLICIT | mapFlags::OMP_MAP_ALWAYS));
+ // For unified_shared_memory, we additionally add `CLOSE` on the descriptor
+ // to ensure device-local placement where required by tests relying on USM +
+ // close semantics.
+ if (moduleRequiresUSM(target->getParentOfType<mlir::ModuleOp>()))
+ flags |= mapFlags::OMP_MAP_CLOSE;
return llvm::to_underlying(flags);
}
@@ -518,6 +534,75 @@ class MapInfoFinalizationPass
return newMapInfoOp;
}
+ // Expand mappings of type(C_PTR) to map their `__address` field explicitly
+ // as a single pointer-sized member (USM-gated at callsite). This helps in
+ // USM scenarios to ensure the pointer-sized mapping is used.
+ mlir::omp::MapInfoOp genCptrMemberMap(mlir::omp::MapInfoOp op,
+ fir::FirOpBuilder &builder) {
+ if (!op.getMembers().empty())
+ return op;
+
+ mlir::Type varTy = fir::unwrapRefType(op.getVarPtr().getType());
+ if (!mlir::isa<fir::RecordType>(varTy))
+ return op;
+ auto recTy = mlir::cast<fir::RecordType>(varTy);
+ // If not a builtin C_PTR record, skip.
+ if (!recTy.getName().ends_with("__builtin_c_ptr"))
+ return op;
+
+ // Find the index of the c_ptr address component named "__address".
+ int32_t fieldIdx = recTy.getFieldIndex("__address");
+ if (fieldIdx < 0)
+ return op;
+
+ mlir::Location loc = op.getVarPtr().getLoc();
+ mlir::Type memTy = recTy.getType(fieldIdx);
+ fir::IntOrValue idxConst =
+ mlir::IntegerAttr::get(builder.getI32Type(), fieldIdx);
+ mlir::Value coord = fir::CoordinateOp::create(
+ builder, loc, builder.getRefType(memTy), op.getVarPtr(),
+ llvm::SmallVector<fir::IntOrValue, 1>{idxConst});
+
+ // Child for the `__address` member.
+ llvm::SmallVector<llvm::SmallVector<int64_t>> memberIdx = {{0}};
+ mlir::ArrayAttr newMembersAttr = builder.create2DI64ArrayAttr(memberIdx);
+ // Force CLOSE in USM paths so the pointer gets device-local placement
+ // when required by tests relying on USM + close semantics.
+ uint64_t mapTypeVal =
+ op.getMapType() |
+ llvm::to_underlying(
+ llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
+ mlir::IntegerAttr mapTypeAttr = builder.getIntegerAttr(
+ builder.getIntegerType(64, /*isSigned=*/false), mapTypeVal);
+
+ mlir::omp::MapInfoOp memberMap = mlir::omp::MapInfoOp::create(
+ builder, loc, coord.getType(), coord,
+ mlir::TypeAttr::get(fir::unwrapRefType(coord.getType())), mapTypeAttr,
+ builder.getAttr<mlir::omp::VariableCaptureKindAttr>(
+ mlir::omp::VariableCaptureKind::ByRef),
+ /*varPtrPtr=*/mlir::Value{},
+ /*members=*/llvm::SmallVector<mlir::Value>{},
+ /*member_index=*/mlir::ArrayAttr{},
+ /*bounds=*/op.getBounds(),
+ /*mapperId=*/mlir::FlatSymbolRefAttr(),
+ /*name=*/op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+
+ // Rebuild the parent as a container with the `__address` member.
+ mlir::omp::MapInfoOp newParent = mlir::omp::MapInfoOp::create(
+ builder, op.getLoc(), op.getResult().getType(), op.getVarPtr(),
+ op.getVarTypeAttr(), mapTypeAttr, op.getMapCaptureTypeAttr(),
+ /*varPtrPtr=*/mlir::Value{},
+ /*members=*/llvm::SmallVector<mlir::Value>{memberMap},
+ /*member_index=*/newMembersAttr,
+ /*bounds=*/llvm::SmallVector<mlir::Value>{},
+ /*mapperId=*/mlir::FlatSymbolRefAttr(), op.getNameAttr(),
+ /*partial_map=*/builder.getBoolAttr(false));
+ op.replaceAllUsesWith(newParent.getResult());
+ op->erase();
+ return newParent;
+ }
+
mlir::omp::MapInfoOp genDescriptorMemberMaps(mlir::omp::MapInfoOp op,
fir::FirOpBuilder &builder,
mlir::Operation *target) {
@@ -1169,6 +1254,17 @@ class MapInfoFinalizationPass
genBoxcharMemberMap(op, builder);
});
+ // Expand type(C_PTR) only when unified_shared_memory is required,
+ // to ensure device-visible pointer size/behavior in USM scenarios
+ // without changing default expectations elsewhere.
+ func->walk([&](mlir::omp::MapInfoOp op) {
+ // Only expand C_PTR members when unified_shared_memory is required.
+ if (!moduleRequiresUSM(func->getParentOfType<mlir::ModuleOp>()))
+ return;
+ builder.setInsertionPoint(op);
+ genCptrMemberMap(op, builder);
+ });
+
func->walk([&](mlir::omp::MapInfoOp op) {
// TODO: Currently only supports a single user for the MapInfoOp. This
// is fine for the moment, as the Fortran frontend will generate a
diff --git a/flang/lib/Optimizer/Support/Utils.cpp b/flang/lib/Optimizer/Support/Utils.cpp
index c71642c..92390e4a 100644
--- a/flang/lib/Optimizer/Support/Utils.cpp
+++ b/flang/lib/Optimizer/Support/Utils.cpp
@@ -51,6 +51,16 @@ std::optional<llvm::ArrayRef<int64_t>> fir::getComponentLowerBoundsIfNonDefault(
return std::nullopt;
}
+std::optional<bool>
+fir::isRecordWithFinalRoutine(fir::RecordType recordType, mlir::ModuleOp module,
+ const mlir::SymbolTable *symbolTable) {
+ fir::TypeInfoOp typeInfo =
+ fir::lookupTypeInfoOp(recordType, module, symbolTable);
+ if (!typeInfo)
+ return std::nullopt;
+ return !typeInfo.getNoFinal();
+}
+
mlir::LLVM::ConstantOp
fir::genConstantIndex(mlir::Location loc, mlir::Type ity,
mlir::ConversionPatternRewriter &rewriter,
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 9507021..b5771eb 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -548,6 +548,14 @@ TYPE_PARSER(construct<OmpAllocatorSimpleModifier>(scalarIntExpr))
TYPE_PARSER(construct<OmpAlwaysModifier>( //
"ALWAYS" >> pure(OmpAlwaysModifier::Value::Always)))
+TYPE_PARSER(construct<OmpAttachModifier::Value>(
+ "ALWAYS" >> pure(OmpAttachModifier::Value::Always) ||
+ "AUTO" >> pure(OmpAttachModifier::Value::Auto) ||
+ "NEVER" >> pure(OmpAttachModifier::Value::Never)))
+
+TYPE_PARSER(construct<OmpAttachModifier>( //
+ "ATTACH" >> parenthesized(Parser<OmpAttachModifier::Value>{})))
+
TYPE_PARSER(construct<OmpAutomapModifier>(
"AUTOMAP" >> pure(OmpAutomapModifier::Value::Automap)))
@@ -744,6 +752,7 @@ TYPE_PARSER(sourced(
TYPE_PARSER(sourced(construct<OmpMapClause::Modifier>(
sourced(construct<OmpMapClause::Modifier>(Parser<OmpAlwaysModifier>{}) ||
+ construct<OmpMapClause::Modifier>(Parser<OmpAttachModifier>{}) ||
construct<OmpMapClause::Modifier>(Parser<OmpCloseModifier>{}) ||
construct<OmpMapClause::Modifier>(Parser<OmpDeleteModifier>{}) ||
construct<OmpMapClause::Modifier>(Parser<OmpPresentModifier>{}) ||
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 0511f5b..b172e429 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -2384,6 +2384,11 @@ public:
Walk(x.v);
Put(")");
}
+ void Unparse(const OmpAttachModifier &x) {
+ Word("ATTACH(");
+ Walk(x.v);
+ Put(")");
+ }
void Unparse(const OmpOrderClause &x) {
using Modifier = OmpOrderClause::Modifier;
Walk(std::get<std::optional<std::list<Modifier>>>(x.t), ":");
@@ -2820,6 +2825,7 @@ public:
WALK_NESTED_ENUM(OmpMapType, Value) // OMP map-type
WALK_NESTED_ENUM(OmpMapTypeModifier, Value) // OMP map-type-modifier
WALK_NESTED_ENUM(OmpAlwaysModifier, Value)
+ WALK_NESTED_ENUM(OmpAttachModifier, Value)
WALK_NESTED_ENUM(OmpCloseModifier, Value)
WALK_NESTED_ENUM(OmpDeleteModifier, Value)
WALK_NESTED_ENUM(OmpPresentModifier, Value)
diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index ea5e2c0..31e246c 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -3622,6 +3622,7 @@ void CheckHelper::CheckDioDtvArg(const Symbol &proc, const Symbol &subp,
ioKind == common::DefinedIo::ReadUnformatted
? Attr::INTENT_INOUT
: Attr::INTENT_IN);
+ CheckDioDummyIsScalar(subp, *arg);
}
}
@@ -3687,6 +3688,7 @@ void CheckHelper::CheckDioAssumedLenCharacterArg(const Symbol &subp,
"Dummy argument '%s' of a defined input/output procedure must be assumed-length CHARACTER of default kind"_err_en_US,
arg->name());
}
+ CheckDioDummyIsScalar(subp, *arg);
}
}
diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp
index 351af5c..515121a 100644
--- a/flang/lib/Semantics/check-omp-atomic.cpp
+++ b/flang/lib/Semantics/check-omp-atomic.cpp
@@ -519,8 +519,8 @@ private:
/// function references with scalar data pointer result of non-character
/// intrinsic type or variables that are non-polymorphic scalar pointers
/// and any length type parameter must be constant.
-void OmpStructureChecker::CheckAtomicType(
- SymbolRef sym, parser::CharBlock source, std::string_view name) {
+void OmpStructureChecker::CheckAtomicType(SymbolRef sym,
+ parser::CharBlock source, std::string_view name, bool checkTypeOnPointer) {
const DeclTypeSpec *typeSpec{sym->GetType()};
if (!typeSpec) {
return;
@@ -547,6 +547,22 @@ void OmpStructureChecker::CheckAtomicType(
return;
}
+ // Apply pointer-to-non-intrinsic rule only for intrinsic-assignment paths.
+ if (checkTypeOnPointer) {
+ using Category = DeclTypeSpec::Category;
+ Category cat{typeSpec->category()};
+ if (cat != Category::Numeric && cat != Category::Logical) {
+ std::string details = " has the POINTER attribute";
+ if (const auto *derived{typeSpec->AsDerived()}) {
+ details += " and derived type '"s + derived->name().ToString() + "'";
+ }
+ context_.Say(source,
+ "ATOMIC operation requires an intrinsic scalar variable; '%s'%s"_err_en_US,
+ sym->name(), details);
+ return;
+ }
+ }
+
// Go over all length parameters, if any, and check if they are
// explicit.
if (const DerivedTypeSpec *derived{typeSpec->AsDerived()}) {
@@ -562,7 +578,7 @@ void OmpStructureChecker::CheckAtomicType(
}
void OmpStructureChecker::CheckAtomicVariable(
- const SomeExpr &atom, parser::CharBlock source) {
+ const SomeExpr &atom, parser::CharBlock source, bool checkTypeOnPointer) {
if (atom.Rank() != 0) {
context_.Say(source, "Atomic variable %s should be a scalar"_err_en_US,
atom.AsFortran());
@@ -572,7 +588,7 @@ void OmpStructureChecker::CheckAtomicVariable(
assert(dsgs.size() == 1 && "Should have a single top-level designator");
evaluate::SymbolVector syms{evaluate::GetSymbolVector(dsgs.front())};
- CheckAtomicType(syms.back(), source, atom.AsFortran());
+ CheckAtomicType(syms.back(), source, atom.AsFortran(), checkTypeOnPointer);
if (IsAllocatable(syms.back()) && !IsArrayElement(atom)) {
context_.Say(source, "Atomic variable %s cannot be ALLOCATABLE"_err_en_US,
@@ -789,7 +805,8 @@ void OmpStructureChecker::CheckAtomicCaptureAssignment(
if (!IsVarOrFunctionRef(atom)) {
ErrorShouldBeVariable(atom, rsrc);
} else {
- CheckAtomicVariable(atom, rsrc);
+ CheckAtomicVariable(
+ atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(capture));
// This part should have been checked prior to calling this function.
assert(*GetConvertInput(capture.rhs) == atom &&
"This cannot be a capture assignment");
@@ -808,7 +825,8 @@ void OmpStructureChecker::CheckAtomicReadAssignment(
if (!IsVarOrFunctionRef(atom)) {
ErrorShouldBeVariable(atom, rsrc);
} else {
- CheckAtomicVariable(atom, rsrc);
+ CheckAtomicVariable(
+ atom, rsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(read));
CheckStorageOverlap(atom, {read.lhs}, source);
}
} else {
@@ -829,7 +847,8 @@ void OmpStructureChecker::CheckAtomicWriteAssignment(
if (!IsVarOrFunctionRef(atom)) {
ErrorShouldBeVariable(atom, rsrc);
} else {
- CheckAtomicVariable(atom, lsrc);
+ CheckAtomicVariable(
+ atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(write));
CheckStorageOverlap(atom, {write.rhs}, source);
}
}
@@ -854,7 +873,8 @@ OmpStructureChecker::CheckAtomicUpdateAssignment(
return std::nullopt;
}
- CheckAtomicVariable(atom, lsrc);
+ CheckAtomicVariable(
+ atom, lsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(update));
auto [hasErrors, tryReassoc]{CheckAtomicUpdateAssignmentRhs(
atom, update.rhs, source, /*suppressDiagnostics=*/true)};
@@ -1017,7 +1037,8 @@ void OmpStructureChecker::CheckAtomicConditionalUpdateAssignment(
return;
}
- CheckAtomicVariable(atom, alsrc);
+ CheckAtomicVariable(
+ atom, alsrc, /*checkTypeOnPointer=*/!IsPointerAssignment(assign));
auto top{GetTopLevelOperationIgnoreResizing(cond)};
// Missing arguments to operations would have been diagnosed by now.
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index d65a89e..4b5610a 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -3017,8 +3017,8 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &) {
&objs,
std::string clause) {
for (const auto &obj : objs.v) {
- if (const parser::Name *
- objName{parser::Unwrap<parser::Name>(obj)}) {
+ if (const parser::Name *objName{
+ parser::Unwrap<parser::Name>(obj)}) {
if (&objName->symbol->GetUltimate() == eventHandleSym) {
context_.Say(GetContext().clauseSource,
"A variable: `%s` that appears in a DETACH clause cannot appear on %s clause on the same construct"_err_en_US,
@@ -3637,7 +3637,8 @@ void OmpStructureChecker::CheckReductionModifier(
if (modifier.v == ReductionModifier::Value::Task) {
// "Task" is only allowed on worksharing or "parallel" directive.
static llvm::omp::Directive worksharing[]{
- llvm::omp::Directive::OMPD_do, llvm::omp::Directive::OMPD_scope,
+ llvm::omp::Directive::OMPD_do, //
+ llvm::omp::Directive::OMPD_scope, //
llvm::omp::Directive::OMPD_sections,
// There are more worksharing directives, but they do not apply:
// "for" is C++ only,
@@ -4081,9 +4082,15 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) {
if (auto *iter{OmpGetUniqueModifier<parser::OmpIterator>(modifiers)}) {
CheckIteratorModifier(*iter);
}
+
+ using Directive = llvm::omp::Directive;
+ Directive dir{GetContext().directive};
+ llvm::ArrayRef<Directive> leafs{llvm::omp::getLeafConstructsOrSelf(dir)};
+ parser::OmpMapType::Value mapType{parser::OmpMapType::Value::Storage};
+
if (auto *type{OmpGetUniqueModifier<parser::OmpMapType>(modifiers)}) {
- using Directive = llvm::omp::Directive;
using Value = parser::OmpMapType::Value;
+ mapType = type->v;
static auto isValidForVersion{
[](parser::OmpMapType::Value t, unsigned version) {
@@ -4120,10 +4127,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) {
return result;
}()};
- llvm::omp::Directive dir{GetContext().directive};
- llvm::ArrayRef<llvm::omp::Directive> leafs{
- llvm::omp::getLeafConstructsOrSelf(dir)};
-
if (llvm::is_contained(leafs, Directive::OMPD_target) ||
llvm::is_contained(leafs, Directive::OMPD_target_data)) {
if (version >= 60) {
@@ -4141,6 +4144,43 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) {
}
}
+ if (auto *attach{
+ OmpGetUniqueModifier<parser::OmpAttachModifier>(modifiers)}) {
+ bool mapEnteringConstructOrMapper{
+ llvm::is_contained(leafs, Directive::OMPD_target) ||
+ llvm::is_contained(leafs, Directive::OMPD_target_data) ||
+ llvm::is_contained(leafs, Directive::OMPD_target_enter_data) ||
+ llvm::is_contained(leafs, Directive::OMPD_declare_mapper)};
+
+ if (!mapEnteringConstructOrMapper || !IsMapEnteringType(mapType)) {
+ const auto &desc{OmpGetDescriptor<parser::OmpAttachModifier>()};
+ context_.Say(OmpGetModifierSource(modifiers, attach),
+ "The '%s' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive"_err_en_US,
+ desc.name.str());
+ }
+
+ auto hasBasePointer{[&](const SomeExpr &item) {
+ evaluate::SymbolVector symbols{evaluate::GetSymbolVector(item)};
+ return llvm::any_of(
+ symbols, [](SymbolRef s) { return IsPointer(s.get()); });
+ }};
+
+ evaluate::ExpressionAnalyzer ea{context_};
+ const auto &objects{std::get<parser::OmpObjectList>(x.v.t)};
+ for (auto &object : objects.v) {
+ if (const parser::Designator *d{GetDesignatorFromObj(object)}) {
+ if (auto &&expr{ea.Analyze(*d)}) {
+ if (hasBasePointer(*expr)) {
+ continue;
+ }
+ }
+ }
+ auto source{GetObjectSource(object)};
+ context_.Say(source ? *source : GetContext().clauseSource,
+ "A list-item that appears in a map clause with the ATTACH modifier must have a base-pointer"_err_en_US);
+ }
+ }
+
auto &&typeMods{
OmpGetRepeatableModifier<parser::OmpMapTypeModifier>(modifiers)};
struct Less {
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index f507278..543642ff 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -262,10 +262,10 @@ private:
void CheckStorageOverlap(const evaluate::Expr<evaluate::SomeType> &,
llvm::ArrayRef<evaluate::Expr<evaluate::SomeType>>, parser::CharBlock);
void ErrorShouldBeVariable(const MaybeExpr &expr, parser::CharBlock source);
- void CheckAtomicType(
- SymbolRef sym, parser::CharBlock source, std::string_view name);
- void CheckAtomicVariable(
- const evaluate::Expr<evaluate::SomeType> &, parser::CharBlock);
+ void CheckAtomicType(SymbolRef sym, parser::CharBlock source,
+ std::string_view name, bool checkTypeOnPointer = true);
+ void CheckAtomicVariable(const evaluate::Expr<evaluate::SomeType> &,
+ parser::CharBlock, bool checkTypeOnPointer = true);
std::pair<const parser::ExecutionPartConstruct *,
const parser::ExecutionPartConstruct *>
CheckUpdateCapture(const parser::ExecutionPartConstruct *ec1,
diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index 8074c94..556259d 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -17,6 +17,7 @@
#include "flang/Semantics/semantics.h"
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/tools.h"
+#include "llvm/Frontend/OpenMP/OMP.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
@@ -24,6 +25,7 @@
#include <fstream>
#include <set>
#include <string_view>
+#include <type_traits>
#include <variant>
#include <vector>
@@ -359,6 +361,40 @@ void ModFileWriter::PrepareRenamings(const Scope &scope) {
}
}
+static void PutOpenMPRequirements(llvm::raw_ostream &os, const Symbol &symbol) {
+ using RequiresClauses = WithOmpDeclarative::RequiresClauses;
+ using OmpMemoryOrderType = common::OmpMemoryOrderType;
+
+ const auto [reqs, order]{common::visit(
+ [&](auto &&details)
+ -> std::pair<const RequiresClauses *, const OmpMemoryOrderType *> {
+ if constexpr (std::is_convertible_v<decltype(details),
+ const WithOmpDeclarative &>) {
+ return {details.ompRequires(), details.ompAtomicDefaultMemOrder()};
+ } else {
+ return {nullptr, nullptr};
+ }
+ },
+ symbol.details())};
+
+ if (order) {
+ llvm::omp::Clause admo{llvm::omp::Clause::OMPC_atomic_default_mem_order};
+ os << "!$omp requires "
+ << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(admo))
+ << '(' << parser::ToLowerCaseLetters(EnumToString(*order)) << ")\n";
+ }
+ if (reqs) {
+ os << "!$omp requires";
+ reqs->IterateOverMembers([&](llvm::omp::Clause f) {
+ if (f != llvm::omp::Clause::OMPC_atomic_default_mem_order) {
+ os << ' '
+ << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(f));
+ }
+ });
+ os << "\n";
+ }
+}
+
// Put out the visible symbols from scope.
void ModFileWriter::PutSymbols(
const Scope &scope, UnorderedSymbolSet *hermeticModules) {
@@ -396,6 +432,7 @@ void ModFileWriter::PutSymbols(
for (const Symbol &symbol : uses) {
PutUse(symbol);
}
+ PutOpenMPRequirements(decls_, DEREF(scope.symbol()));
for (const auto &set : scope.equivalenceSets()) {
if (!set.empty() &&
!set.front().symbol.test(Symbol::Flag::CompilerCreated)) {
diff --git a/flang/lib/Semantics/openmp-modifiers.cpp b/flang/lib/Semantics/openmp-modifiers.cpp
index af4000c..717fb03 100644
--- a/flang/lib/Semantics/openmp-modifiers.cpp
+++ b/flang/lib/Semantics/openmp-modifiers.cpp
@@ -157,6 +157,22 @@ const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAlwaysModifier>() {
}
template <>
+const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAttachModifier>() {
+ static const OmpModifierDescriptor desc{
+ /*name=*/"attach-modifier",
+ /*props=*/
+ {
+ {61, {OmpProperty::Unique}},
+ },
+ /*clauses=*/
+ {
+ {61, {Clause::OMPC_map}},
+ },
+ };
+ return desc;
+}
+
+template <>
const OmpModifierDescriptor &OmpGetDescriptor<parser::OmpAutomapModifier>() {
static const OmpModifierDescriptor desc{
/*name=*/"automap-modifier",
diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp
index a8ec4d6..292e73b 100644
--- a/flang/lib/Semantics/openmp-utils.cpp
+++ b/flang/lib/Semantics/openmp-utils.cpp
@@ -13,6 +13,7 @@
#include "flang/Semantics/openmp-utils.h"
#include "flang/Common/Fortran-consts.h"
+#include "flang/Common/idioms.h"
#include "flang/Common/indirection.h"
#include "flang/Common/reference.h"
#include "flang/Common/visit.h"
@@ -59,6 +60,26 @@ const Scope &GetScopingUnit(const Scope &scope) {
return *iter;
}
+const Scope &GetProgramUnit(const Scope &scope) {
+ const Scope *unit{nullptr};
+ for (const Scope *iter{&scope}; !iter->IsTopLevel(); iter = &iter->parent()) {
+ switch (iter->kind()) {
+ case Scope::Kind::BlockData:
+ case Scope::Kind::MainProgram:
+ case Scope::Kind::Module:
+ return *iter;
+ case Scope::Kind::Subprogram:
+ // Ignore subprograms that are nested.
+ unit = iter;
+ break;
+ default:
+ break;
+ }
+ }
+ assert(unit && "Scope not in a program unit");
+ return *unit;
+}
+
SourcedActionStmt GetActionStmt(const parser::ExecutionPartConstruct *x) {
if (x == nullptr) {
return SourcedActionStmt{};
@@ -202,7 +223,7 @@ std::optional<SomeExpr> GetEvaluateExpr(const parser::Expr &parserExpr) {
// ForwardOwningPointer typedExpr
// `- GenericExprWrapper ^.get()
// `- std::optional<Expr> ^->v
- return typedExpr.get()->v;
+ return DEREF(typedExpr.get()).v;
}
std::optional<evaluate::DynamicType> GetDynamicType(
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 18fc638..1228493 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -435,6 +435,22 @@ public:
return true;
}
+ bool Pre(const parser::UseStmt &x) {
+ if (x.moduleName.symbol) {
+ Scope &thisScope{context_.FindScope(x.moduleName.source)};
+ common::visit(
+ [&](auto &&details) {
+ if constexpr (std::is_convertible_v<decltype(details),
+ const WithOmpDeclarative &>) {
+ AddOmpRequiresToScope(thisScope, details.ompRequires(),
+ details.ompAtomicDefaultMemOrder());
+ }
+ },
+ x.moduleName.symbol->details());
+ }
+ return true;
+ }
+
bool Pre(const parser::OmpMetadirectiveDirective &x) {
PushContext(x.v.source, llvm::omp::Directive::OMPD_metadirective);
return true;
@@ -538,38 +554,37 @@ public:
void Post(const parser::OpenMPFlushConstruct &) { PopContext(); }
bool Pre(const parser::OpenMPRequiresConstruct &x) {
- using Flags = WithOmpDeclarative::RequiresFlags;
- using Requires = WithOmpDeclarative::RequiresFlag;
+ using RequiresClauses = WithOmpDeclarative::RequiresClauses;
PushContext(x.source, llvm::omp::Directive::OMPD_requires);
// Gather information from the clauses.
- Flags flags;
- std::optional<common::OmpMemoryOrderType> memOrder;
+ RequiresClauses reqs;
+ const common::OmpMemoryOrderType *memOrder{nullptr};
for (const parser::OmpClause &clause : x.v.Clauses().v) {
- flags |= common::visit(
+ using OmpClause = parser::OmpClause;
+ reqs |= common::visit(
common::visitors{
- [&memOrder](
- const parser::OmpClause::AtomicDefaultMemOrder &atomic) {
- memOrder = atomic.v.v;
- return Flags{};
- },
- [](const parser::OmpClause::ReverseOffload &) {
- return Flags{Requires::ReverseOffload};
- },
- [](const parser::OmpClause::UnifiedAddress &) {
- return Flags{Requires::UnifiedAddress};
+ [&](const OmpClause::AtomicDefaultMemOrder &atomic) {
+ memOrder = &atomic.v.v;
+ return RequiresClauses{};
},
- [](const parser::OmpClause::UnifiedSharedMemory &) {
- return Flags{Requires::UnifiedSharedMemory};
- },
- [](const parser::OmpClause::DynamicAllocators &) {
- return Flags{Requires::DynamicAllocators};
+ [&](auto &&s) {
+ using TypeS = llvm::remove_cvref_t<decltype(s)>;
+ if constexpr ( //
+ std::is_same_v<TypeS, OmpClause::DynamicAllocators> ||
+ std::is_same_v<TypeS, OmpClause::ReverseOffload> ||
+ std::is_same_v<TypeS, OmpClause::UnifiedAddress> ||
+ std::is_same_v<TypeS, OmpClause::UnifiedSharedMemory>) {
+ return RequiresClauses{clause.Id()};
+ } else {
+ return RequiresClauses{};
+ }
},
- [](const auto &) { return Flags{}; }},
+ },
clause.u);
}
// Merge clauses into parents' symbols details.
- AddOmpRequiresToScope(currScope(), flags, memOrder);
+ AddOmpRequiresToScope(currScope(), &reqs, memOrder);
return true;
}
void Post(const parser::OpenMPRequiresConstruct &) { PopContext(); }
@@ -1001,8 +1016,9 @@ private:
std::int64_t ordCollapseLevel{0};
- void AddOmpRequiresToScope(Scope &, WithOmpDeclarative::RequiresFlags,
- std::optional<common::OmpMemoryOrderType>);
+ void AddOmpRequiresToScope(Scope &,
+ const WithOmpDeclarative::RequiresClauses *,
+ const common::OmpMemoryOrderType *);
void IssueNonConformanceWarning(llvm::omp::Directive D,
parser::CharBlock source, unsigned EmitFromVersion);
@@ -3309,86 +3325,6 @@ void ResolveOmpParts(
}
}
-void ResolveOmpTopLevelParts(
- SemanticsContext &context, const parser::Program &program) {
- if (!context.IsEnabled(common::LanguageFeature::OpenMP)) {
- return;
- }
-
- // Gather REQUIRES clauses from all non-module top-level program unit symbols,
- // combine them together ensuring compatibility and apply them to all these
- // program units. Modules are skipped because their REQUIRES clauses should be
- // propagated via USE statements instead.
- WithOmpDeclarative::RequiresFlags combinedFlags;
- std::optional<common::OmpMemoryOrderType> combinedMemOrder;
-
- // Function to go through non-module top level program units and extract
- // REQUIRES information to be processed by a function-like argument.
- auto processProgramUnits{[&](auto processFn) {
- for (const parser::ProgramUnit &unit : program.v) {
- if (!std::holds_alternative<common::Indirection<parser::Module>>(
- unit.u) &&
- !std::holds_alternative<common::Indirection<parser::Submodule>>(
- unit.u) &&
- !std::holds_alternative<
- common::Indirection<parser::CompilerDirective>>(unit.u)) {
- Symbol *symbol{common::visit(
- [&context](auto &x) {
- Scope *scope = GetScope(context, x.value());
- return scope ? scope->symbol() : nullptr;
- },
- unit.u)};
- // FIXME There is no symbol defined for MainProgram units in certain
- // circumstances, so REQUIRES information has no place to be stored in
- // these cases.
- if (!symbol) {
- continue;
- }
- common::visit(
- [&](auto &details) {
- if constexpr (std::is_convertible_v<decltype(&details),
- WithOmpDeclarative *>) {
- processFn(*symbol, details);
- }
- },
- symbol->details());
- }
- }
- }};
-
- // Combine global REQUIRES information from all program units except modules
- // and submodules.
- processProgramUnits([&](Symbol &symbol, WithOmpDeclarative &details) {
- if (const WithOmpDeclarative::RequiresFlags *
- flags{details.ompRequires()}) {
- combinedFlags |= *flags;
- }
- if (const common::OmpMemoryOrderType *
- memOrder{details.ompAtomicDefaultMemOrder()}) {
- if (combinedMemOrder && *combinedMemOrder != *memOrder) {
- context.Say(symbol.scope()->sourceRange(),
- "Conflicting '%s' REQUIRES clauses found in compilation "
- "unit"_err_en_US,
- parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(
- llvm::omp::Clause::OMPC_atomic_default_mem_order)
- .str()));
- }
- combinedMemOrder = *memOrder;
- }
- });
-
- // Update all program units except modules and submodules with the combined
- // global REQUIRES information.
- processProgramUnits([&](Symbol &, WithOmpDeclarative &details) {
- if (combinedFlags.any()) {
- details.set_ompRequires(combinedFlags);
- }
- if (combinedMemOrder) {
- details.set_ompAtomicDefaultMemOrder(*combinedMemOrder);
- }
- });
-}
-
static bool IsSymbolThreadprivate(const Symbol &symbol) {
if (const auto *details{symbol.detailsIf<HostAssocDetails>()}) {
return details->symbol().test(Symbol::Flag::OmpThreadprivate);
@@ -3547,42 +3483,39 @@ void OmpAttributeVisitor::CheckLabelContext(const parser::CharBlock source,
}
void OmpAttributeVisitor::AddOmpRequiresToScope(Scope &scope,
- WithOmpDeclarative::RequiresFlags flags,
- std::optional<common::OmpMemoryOrderType> memOrder) {
- Scope *scopeIter = &scope;
- do {
- if (Symbol * symbol{scopeIter->symbol()}) {
- common::visit(
- [&](auto &details) {
- // Store clauses information into the symbol for the parent and
- // enclosing modules, programs, functions and subroutines.
- if constexpr (std::is_convertible_v<decltype(&details),
- WithOmpDeclarative *>) {
- if (flags.any()) {
- if (const WithOmpDeclarative::RequiresFlags *
- otherFlags{details.ompRequires()}) {
- flags |= *otherFlags;
- }
- details.set_ompRequires(flags);
+ const WithOmpDeclarative::RequiresClauses *reqs,
+ const common::OmpMemoryOrderType *memOrder) {
+ const Scope &programUnit{omp::GetProgramUnit(scope)};
+ using RequiresClauses = WithOmpDeclarative::RequiresClauses;
+ RequiresClauses combinedReqs{reqs ? *reqs : RequiresClauses{}};
+
+ if (auto *symbol{const_cast<Symbol *>(programUnit.symbol())}) {
+ common::visit(
+ [&](auto &details) {
+ if constexpr (std::is_convertible_v<decltype(&details),
+ WithOmpDeclarative *>) {
+ if (combinedReqs.any()) {
+ if (const RequiresClauses *otherReqs{details.ompRequires()}) {
+ combinedReqs |= *otherReqs;
}
- if (memOrder) {
- if (details.has_ompAtomicDefaultMemOrder() &&
- *details.ompAtomicDefaultMemOrder() != *memOrder) {
- context_.Say(scopeIter->sourceRange(),
- "Conflicting '%s' REQUIRES clauses found in compilation "
- "unit"_err_en_US,
- parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(
- llvm::omp::Clause::OMPC_atomic_default_mem_order)
- .str()));
- }
- details.set_ompAtomicDefaultMemOrder(*memOrder);
+ details.set_ompRequires(combinedReqs);
+ }
+ if (memOrder) {
+ if (details.has_ompAtomicDefaultMemOrder() &&
+ *details.ompAtomicDefaultMemOrder() != *memOrder) {
+ context_.Say(programUnit.sourceRange(),
+ "Conflicting '%s' REQUIRES clauses found in compilation "
+ "unit"_err_en_US,
+ parser::ToUpperCaseLetters(llvm::omp::getOpenMPClauseName(
+ llvm::omp::Clause::OMPC_atomic_default_mem_order)
+ .str()));
}
+ details.set_ompAtomicDefaultMemOrder(*memOrder);
}
- },
- symbol->details());
- }
- scopeIter = &scopeIter->parent();
- } while (!scopeIter->IsGlobal());
+ }
+ },
+ symbol->details());
+ }
}
void OmpAttributeVisitor::IssueNonConformanceWarning(llvm::omp::Directive D,
diff --git a/flang/lib/Semantics/resolve-directives.h b/flang/lib/Semantics/resolve-directives.h
index 5a890c2..36d3ce9 100644
--- a/flang/lib/Semantics/resolve-directives.h
+++ b/flang/lib/Semantics/resolve-directives.h
@@ -23,7 +23,5 @@ class SemanticsContext;
void ResolveAccParts(
SemanticsContext &, const parser::ProgramUnit &, Scope *topScope);
void ResolveOmpParts(SemanticsContext &, const parser::ProgramUnit &);
-void ResolveOmpTopLevelParts(SemanticsContext &, const parser::Program &);
-
} // namespace Fortran::semantics
#endif
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 86121880..ae0ff9ca 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -10687,9 +10687,6 @@ void ResolveNamesVisitor::Post(const parser::Program &x) {
CHECK(!attrs_);
CHECK(!cudaDataAttr_);
CHECK(!GetDeclTypeSpec());
- // Top-level resolution to propagate information across program units after
- // each of them has been resolved separately.
- ResolveOmpTopLevelParts(context(), x);
}
// A singleton instance of the scope -> IMPLICIT rules mapping is
diff --git a/flang/lib/Semantics/symbol.cpp b/flang/lib/Semantics/symbol.cpp
index 69169469..0ec44b7 100644
--- a/flang/lib/Semantics/symbol.cpp
+++ b/flang/lib/Semantics/symbol.cpp
@@ -70,6 +70,32 @@ static void DumpList(llvm::raw_ostream &os, const char *label, const T &list) {
}
}
+llvm::raw_ostream &operator<<(
+ llvm::raw_ostream &os, const WithOmpDeclarative &x) {
+ if (x.has_ompRequires() || x.has_ompAtomicDefaultMemOrder()) {
+ os << " OmpRequirements:(";
+ if (const common::OmpMemoryOrderType *admo{x.ompAtomicDefaultMemOrder()}) {
+ os << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(
+ llvm::omp::Clause::OMPC_atomic_default_mem_order))
+ << '(' << parser::ToLowerCaseLetters(EnumToString(*admo)) << ')';
+ if (x.has_ompRequires()) {
+ os << ',';
+ }
+ }
+ if (const WithOmpDeclarative::RequiresClauses *reqs{x.ompRequires()}) {
+ size_t num{0}, size{reqs->count()};
+ reqs->IterateOverMembers([&](llvm::omp::Clause f) {
+ os << parser::ToLowerCaseLetters(llvm::omp::getOpenMPClauseName(f));
+ if (++num < size) {
+ os << ',';
+ }
+ });
+ }
+ os << ')';
+ }
+ return os;
+}
+
void SubprogramDetails::set_moduleInterface(Symbol &symbol) {
CHECK(!moduleInterface_);
moduleInterface_ = &symbol;
@@ -150,6 +176,7 @@ llvm::raw_ostream &operator<<(
os << x;
}
}
+ os << static_cast<const WithOmpDeclarative &>(x);
return os;
}
@@ -580,7 +607,9 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) {
common::visit( //
common::visitors{
[&](const UnknownDetails &) {},
- [&](const MainProgramDetails &) {},
+ [&](const MainProgramDetails &x) {
+ os << static_cast<const WithOmpDeclarative &>(x);
+ },
[&](const ModuleDetails &x) {
if (x.isSubmodule()) {
os << " (";
@@ -599,6 +628,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &os, const Details &details) {
if (x.isDefaultPrivate()) {
os << " isDefaultPrivate";
}
+ os << static_cast<const WithOmpDeclarative &>(x);
},
[&](const SubprogramNameDetails &x) {
os << ' ' << EnumToString(x.kind());
diff --git a/flang/test/Fir/CUDA/cuda-code-gen.mlir b/flang/test/Fir/CUDA/cuda-code-gen.mlir
index bbd3f9f..60cda9e 100644
--- a/flang/test/Fir/CUDA/cuda-code-gen.mlir
+++ b/flang/test/Fir/CUDA/cuda-code-gen.mlir
@@ -284,3 +284,31 @@ module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_e
// CHECK-LABEL: llvm.func @_QQxxx()
// CHECK: llvm.alloca %{{.*}} x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr
// CHECK-NOT: llvm.call @_FortranACUFAllocDescriptor
+
+// -----
+
+module attributes {gpu.container_module, dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>} {
+ gpu.module @cuda_device_mod {
+ fir.global @_QMkernelsEinitial_val {data_attr = #cuf.cuda<constant>} : i32 {
+ %0 = fir.zero_bits i32
+ fir.has_value %0 : i32
+ }
+ gpu.func @_QMkernelsPassign(%arg0: !fir.ref<!fir.array<?xi32>>) kernel {
+ %c-1 = arith.constant -1 : index
+ %c1_i32 = arith.constant 1 : i32
+ %0 = arith.constant 1 : i32
+ %1 = arith.addi %0, %c1_i32 : i32
+ %2 = fir.address_of(@_QMkernelsEinitial_val) : !fir.ref<i32>
+ %4 = fir.load %2 : !fir.ref<i32>
+ %5 = fir.convert %1 : (i32) -> i64
+ %6 = fircg.ext_array_coor %arg0(%c-1)<%5> : (!fir.ref<!fir.array<?xi32>>, index, i64) -> !fir.ref<i32>
+ fir.store %4 to %6 : !fir.ref<i32>
+ gpu.return
+ }
+ }
+}
+
+// CHECK: llvm.mlir.global external @_QMkernelsEinitial_val() {addr_space = 4 : i32} : i32
+// CHECK-LABEL: gpu.func @_QMkernelsPassign
+// CHECK: %[[ADDROF:.*]] = llvm.mlir.addressof @_QMkernelsEinitial_val : !llvm.ptr<4>
+// CHECK: %{{.*}} = llvm.addrspacecast %[[ADDROF]] : !llvm.ptr<4> to !llvm.ptr
diff --git a/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
new file mode 100644
index 0000000..fabfe4c
--- /dev/null
+++ b/flang/test/Fir/OpenACC/openacc-type-categories-declare-storage.mlir
@@ -0,0 +1,24 @@
+// Use --mlir-disable-threading so that the diagnostic printing is serialized.
+// RUN: fir-opt %s -pass-pipeline='builtin.module(test-fir-openacc-interfaces)' -split-input-file --mlir-disable-threading 2>&1 | FileCheck %s
+
+module {
+ // Build a scalar view via fir.declare with a storage operand into an array of i8
+ func.func @_QPdeclare_with_storage_is_nonscalar() {
+ %c0 = arith.constant 0 : index
+ %arr = fir.alloca !fir.array<4xi8>
+ %elem_i8 = fir.coordinate_of %arr, %c0 : (!fir.ref<!fir.array<4xi8>>, index) -> !fir.ref<i8>
+ %elem_f32 = fir.convert %elem_i8 : (!fir.ref<i8>) -> !fir.ref<f32>
+ %view = fir.declare %elem_f32 storage(%arr[0]) {uniq_name = "_QFpi"}
+ : (!fir.ref<f32>, !fir.ref<!fir.array<4xi8>>) -> !fir.ref<f32>
+ // Force interface query through an acc op that prints type category
+ %cp = acc.copyin varPtr(%view : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+ acc.enter_data dataOperands(%cp : !fir.ref<f32>)
+ return
+ }
+
+ // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref<f32>) -> !fir.ref<f32> {name = "pi", structured = false}
+ // CHECK: Pointer-like and Mappable: !fir.ref<f32>
+ // CHECK: Type category: array
+}
+
+
diff --git a/flang/test/Fir/OpenACC/recipe-bufferization.mlir b/flang/test/Fir/OpenACC/recipe-bufferization.mlir
new file mode 100644
index 0000000..c4f96f6
--- /dev/null
+++ b/flang/test/Fir/OpenACC/recipe-bufferization.mlir
@@ -0,0 +1,316 @@
+// RUN: fir-opt %s --fir-acc-recipe-bufferization -split-input-file | FileCheck %s
+
+// -----
+
+acc.private.recipe @priv_ref_box : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %1 = fir.allocmem i32
+ %2 = fir.embox %1 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %2 : !fir.box<i32>
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ %0 = fir.box_addr %arg1 : (!fir.box<i32>) -> !fir.ref<i32>
+ %1 = fir.convert %0 : (!fir.ref<i32>) -> !fir.heap<i32>
+ fir.freemem %1 : !fir.heap<i32>
+ acc.yield
+}
+
+// CHECK-LABEL: acc.private.recipe @priv_ref_box : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX:.*]] = fir.embox
+// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[DARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[DARG1:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LD1:.*]] = fir.load %[[DARG1]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[ADDR:.*]] = fir.box_addr %[[LD1]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[CVT:.*]] = fir.convert %[[ADDR]] : (!fir.ref<i32>) -> !fir.heap<i32>
+
+// -----
+
+// Test private recipe without destroy region.
+
+acc.private.recipe @priv_ref_box_no_destroy : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %1 = fir.alloca i32
+ %2 = fir.embox %1 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %2 : !fir.box<i32>
+}
+
+// CHECK-LABEL: acc.private.recipe @priv_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[ARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX:.*]] = fir.embox
+// CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX]] to %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: }
+
+// -----
+
+// Firstprivate recipe with destroy region.
+acc.firstprivate.recipe @fp_ref_box : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.allocmem i32
+ %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} copy {
+^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>):
+ %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32>
+ %val = fir.load %s_addr : !fir.ref<i32>
+ %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32>
+ fir.store %val to %d_addr : !fir.ref<i32>
+ acc.yield
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ acc.yield
+}
+
+// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[IARG:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX_FP:.*]] = fir.embox
+// CHECK: %[[ALLOCA_FP:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX_FP]] to %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA_FP]] : !fir.ref<!fir.box<i32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC:.*]]: !fir.ref<!fir.box<i32>>, %[[DST:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LSRC:.*]] = fir.load %[[SRC]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LDST:.*]] = fir.load %[[DST]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[SADDR:.*]] = fir.box_addr %[[LSRC]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL:.*]] = fir.load %[[SADDR]] : !fir.ref<i32>
+// CHECK: %[[DADDR:.*]] = fir.box_addr %[[LDST]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: fir.store %[[VAL]] to %[[DADDR]] : !fir.ref<i32>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[FDARG0:.*]]: !fir.ref<!fir.box<i32>>, %[[FDARG1:.*]]: !fir.ref<!fir.box<i32>>)
+
+// -----
+
+// Firstprivate recipe without destroy region.
+acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.box<i32> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} copy {
+^bb0(%src: !fir.box<i32>, %dst: !fir.box<i32>):
+ %s_addr = fir.box_addr %src : (!fir.box<i32>) -> !fir.ref<i32>
+ %val = fir.load %s_addr : !fir.ref<i32>
+ %d_addr = fir.box_addr %dst : (!fir.box<i32>) -> !fir.ref<i32>
+ fir.store %val to %d_addr : !fir.ref<i32>
+ acc.yield
+}
+
+// CHECK-LABEL: acc.firstprivate.recipe @fp_ref_box_no_destroy : !fir.ref<!fir.box<i32>> init
+// CHECK: ^bb0(%[[IARG2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOX_FP2:.*]] = fir.embox
+// CHECK: %[[ALLOCA_FP2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOX_FP2]] to %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCA_FP2]] : !fir.ref<!fir.box<i32>>
+// CHECK: } copy {
+// CHECK: ^bb0(%[[SRC2:.*]]: !fir.ref<!fir.box<i32>>, %[[DST2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LSRC2:.*]] = fir.load %[[SRC2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LDST2:.*]] = fir.load %[[DST2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[SADDR2:.*]] = fir.box_addr %[[LSRC2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL2:.*]] = fir.load %[[SADDR2]] : !fir.ref<i32>
+// CHECK: %[[DADDR2:.*]] = fir.box_addr %[[LDST2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: fir.store %[[VAL2]] to %[[DADDR2]] : !fir.ref<i32>
+
+// -----
+
+// Reduction recipe with destroy region.
+acc.reduction.recipe @red_ref_box : !fir.box<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.allocmem i32
+ %1 = fir.embox %0 : (!fir.heap<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} combiner {
+^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>):
+ %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %l_val = fir.load %l_addr : !fir.ref<i32>
+ %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %r_val = fir.load %r_addr : !fir.ref<i32>
+ %sum = arith.addi %l_val, %r_val : i32
+ %tmp = fir.alloca i32
+ fir.store %sum to %tmp : !fir.ref<i32>
+ %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %new : !fir.box<i32>
+} destroy {
+^bb0(%arg0: !fir.box<i32>, %arg1: !fir.box<i32>):
+ acc.yield
+}
+
+// CHECK-LABEL: acc.reduction.recipe @red_ref_box : !fir.ref<!fir.box<i32>> reduction_operator <add> init
+// CHECK: ^bb0(%[[IARGR:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOXR:.*]] = fir.embox
+// CHECK: %[[ALLOCAR:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOXR]] to %[[ALLOCAR]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCAR]] : !fir.ref<!fir.box<i32>>
+// CHECK: } combiner {
+// CHECK: ^bb0(%[[LHS:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LLHS:.*]] = fir.load %[[LHS]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LRHS:.*]] = fir.load %[[RHS]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LADDR:.*]] = fir.box_addr %[[LLHS]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[LVAL:.*]] = fir.load %[[LADDR]] : !fir.ref<i32>
+// CHECK: %[[RADDR:.*]] = fir.box_addr %[[LRHS]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[RVAL:.*]] = fir.load %[[RADDR]] : !fir.ref<i32>
+// CHECK: %[[SUM:.*]] = arith.addi %[[LVAL]], %[[RVAL]] : i32
+// CHECK: %[[I32ALLOCA:.*]] = fir.alloca i32
+// CHECK: fir.store %[[SUM]] to %[[I32ALLOCA]] : !fir.ref<i32>
+// CHECK: %[[NEWBOX:.*]] = fir.embox %[[I32ALLOCA]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK: %[[BOXALLOCA:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[NEWBOX]] to %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[BOXALLOCA]] : !fir.ref<!fir.box<i32>>
+// CHECK: } destroy {
+// CHECK: ^bb0(%[[RD0:.*]]: !fir.ref<!fir.box<i32>>, %[[RD1:.*]]: !fir.ref<!fir.box<i32>>)
+
+// -----
+
+// Reduction recipe without destroy region.
+acc.reduction.recipe @red_ref_box_no_destroy : !fir.box<i32> reduction_operator <add> init {
+^bb0(%arg0: !fir.box<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.embox %0 : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %1 : !fir.box<i32>
+} combiner {
+^bb0(%lhs: !fir.box<i32>, %rhs: !fir.box<i32>):
+ %l_addr = fir.box_addr %lhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %l_val = fir.load %l_addr : !fir.ref<i32>
+ %r_addr = fir.box_addr %rhs : (!fir.box<i32>) -> !fir.ref<i32>
+ %r_val = fir.load %r_addr : !fir.ref<i32>
+ %sum = arith.addi %l_val, %r_val : i32
+ %tmp = fir.alloca i32
+ fir.store %sum to %tmp : !fir.ref<i32>
+ %new = fir.embox %tmp : (!fir.ref<i32>) -> !fir.box<i32>
+ acc.yield %new : !fir.box<i32>
+}
+
+// CHECK-LABEL: acc.reduction.recipe @red_ref_box_no_destroy : !fir.ref<!fir.box<i32>> reduction_operator <add> init
+// CHECK: ^bb0(%[[IARGR2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[EMBOXR2:.*]] = fir.embox
+// CHECK: %[[ALLOCAR2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[EMBOXR2]] to %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[ALLOCAR2]] : !fir.ref<!fir.box<i32>>
+// CHECK: } combiner {
+// CHECK: ^bb0(%[[LHS2:.*]]: !fir.ref<!fir.box<i32>>, %[[RHS2:.*]]: !fir.ref<!fir.box<i32>>)
+// CHECK: %[[LLHS2:.*]] = fir.load %[[LHS2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LRHS2:.*]] = fir.load %[[RHS2]] : !fir.ref<!fir.box<i32>>
+// CHECK: %[[LADDR2:.*]] = fir.box_addr %[[LLHS2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[LVAL2:.*]] = fir.load %[[LADDR2]] : !fir.ref<i32>
+// CHECK: %[[RADDR2:.*]] = fir.box_addr %[[LRHS2]] : (!fir.box<i32>) -> !fir.ref<i32>
+// CHECK: %[[RVAL2:.*]] = fir.load %[[RADDR2]] : !fir.ref<i32>
+// CHECK: %[[SUM2:.*]] = arith.addi %[[LVAL2]], %[[RVAL2]] : i32
+// CHECK: %[[I32ALLOCA2:.*]] = fir.alloca i32
+// CHECK: fir.store %[[SUM2]] to %[[I32ALLOCA2]] : !fir.ref<i32>
+// CHECK: %[[NEWBOX2:.*]] = fir.embox %[[I32ALLOCA2]] : (!fir.ref<i32>) -> !fir.box<i32>
+// CHECK: %[[BOXALLOCA2:.*]] = fir.alloca !fir.box<i32>
+// CHECK: fir.store %[[NEWBOX2]] to %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>>
+// CHECK: acc.yield %[[BOXALLOCA2]] : !fir.ref<!fir.box<i32>>
+
+// -----
+
+// Comprehensive tests that also test recipe usages updates.
+
+acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init {
+^bb0(%arg0: !fir.ref<i32>):
+ %0 = fir.alloca i32
+ %1 = fir.declare %0 {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ acc.yield %1 : !fir.ref<i32>
+}
+acc.private.recipe @privatization_box_Uxf32 : !fir.box<!fir.array<?xf32>> init {
+^bb0(%arg0: !fir.box<!fir.array<?xf32>>):
+ %c0 = arith.constant 0 : index
+ %0:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+ %1 = fir.shape %0#1 : (index) -> !fir.shape<1>
+ %2 = fir.allocmem !fir.array<?xf32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
+ %3 = fir.declare %2(%1) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>>
+ %4 = fir.embox %3(%1) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+ acc.yield %4 : !fir.box<!fir.array<?xf32>>
+} destroy {
+^bb0(%arg0: !fir.box<!fir.array<?xf32>>, %arg1: !fir.box<!fir.array<?xf32>>):
+ %0 = fir.box_addr %arg1 : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+ %1 = fir.convert %0 : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>>
+ fir.freemem %1 : !fir.heap<!fir.array<?xf32>>
+ acc.terminator
+}
+func.func @_QPfoo(%arg0: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+ %c200_i32 = arith.constant 200 : i32
+ %c1_i32 = arith.constant 1 : i32
+ %0 = fir.dummy_scope : !fir.dscope
+ %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"}
+ %2 = fir.declare %1 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %3 = fir.declare %arg0 dummy_scope %0 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+ acc.parallel combined(loop) {
+ %4 = acc.private var(%3 : !fir.box<!fir.array<?xf32>>) -> !fir.box<!fir.array<?xf32>> {name = "x"}
+ %5 = acc.private varPtr(%2 : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+ acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %4 : !fir.box<!fir.array<?xf32>>, @privatization_ref_i32 -> %5 : !fir.ref<i32>) control(%arg1 : i32) = (%c1_i32 : i32) to (%c200_i32 : i32) step (%c1_i32 : i32) {
+ %6 = fir.dummy_scope : !fir.dscope
+ %7 = fir.declare %4 dummy_scope %6 {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+ %8 = fir.declare %5 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+ %9 = fir.convert %arg1 : (i32) -> f32
+ %10 = fir.convert %arg1 : (i32) -> i64
+ %11 = fir.array_coor %7 %10 : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+ fir.store %9 to %11 : !fir.ref<f32>
+ acc.yield
+ } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+ acc.yield
+ }
+ return
+}
+
+// CHECK-LABEL: acc.private.recipe @privatization_ref_i32 : !fir.ref<i32> init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<i32>):
+// CHECK: %[[VAL_1:.*]] = fir.alloca i32
+// CHECK: %[[VAL_2:.*]] = fir.declare %[[VAL_1]] {uniq_name = "acc.private.init"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: acc.yield %[[VAL_2]] : !fir.ref<i32>
+// CHECK: }
+
+// CHECK-LABEL: acc.private.recipe @privatization_box_Uxf32 : !fir.ref<!fir.box<!fir.array<?xf32>>> init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>):
+// CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_2:.*]] = arith.constant 0 : index
+// CHECK: %[[VAL_3:.*]]:3 = fir.box_dims %[[VAL_1]], %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>, index) -> (index, index, index)
+// CHECK: %[[VAL_4:.*]] = fir.shape %[[VAL_3]]#1 : (index) -> !fir.shape<1>
+// CHECK: %[[VAL_5:.*]] = fir.allocmem !fir.array<?xf32>, %[[VAL_3]]#1 {bindc_name = ".tmp", uniq_name = ""}
+// CHECK: %[[VAL_6:.*]] = fir.declare %[[VAL_5]](%[[VAL_4]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK: %[[VAL_7:.*]] = fir.embox %[[VAL_6]](%[[VAL_4]]) : (!fir.heap<!fir.array<?xf32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
+// CHECK: fir.store %[[VAL_7]] to %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: acc.yield %[[VAL_8]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+
+// CHECK-LABEL: } destroy {
+// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>, %[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.array<?xf32>>>):
+// CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_1]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_3:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box<!fir.array<?xf32>>) -> !fir.ref<!fir.array<?xf32>>
+// CHECK: %[[VAL_4:.*]] = fir.convert %[[VAL_3]] : (!fir.ref<!fir.array<?xf32>>) -> !fir.heap<!fir.array<?xf32>>
+// CHECK: fir.freemem %[[VAL_4]] : !fir.heap<!fir.array<?xf32>>
+// CHECK: acc.terminator
+// CHECK: }
+
+// CHECK-LABEL: func.func @_QPfoo(
+// CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "x"}) {
+// CHECK: %[[VAL_0:.*]] = arith.constant 200 : i32
+// CHECK: %[[VAL_1:.*]] = arith.constant 1 : i32
+// CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFfooEi"}
+// CHECK: %[[VAL_4:.*]] = fir.declare %[[VAL_3]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_5:.*]] = fir.declare %[[ARG0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_6:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
+// CHECK: fir.store %[[VAL_5]] to %[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: acc.parallel combined(loop) {
+// CHECK: %[[VAL_7:.*]] = acc.private varPtr(%[[VAL_6]] : !fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.ref<!fir.box<!fir.array<?xf32>>> {name = "x"}
+// CHECK: %[[VAL_8:.*]] = acc.private varPtr(%[[VAL_4]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = "i"}
+// CHECK: acc.loop combined(parallel) private(@privatization_box_Uxf32 -> %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, @privatization_ref_i32 -> %[[VAL_8]] : !fir.ref<i32>) control(%[[VAL_9:.*]] : i32) = (%[[VAL_1]] : i32) to (%[[VAL_0]] : i32) step (%[[VAL_1]] : i32) {
+// CHECK: %[[VAL_10:.*]] = fir.dummy_scope : !fir.dscope
+// CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_7]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
+// CHECK: %[[VAL_12:.*]] = fir.declare %[[VAL_11]] dummy_scope %[[VAL_10]] {uniq_name = "_QFfooEx"} : (!fir.box<!fir.array<?xf32>>, !fir.dscope) -> !fir.box<!fir.array<?xf32>>
+// CHECK: %[[VAL_13:.*]] = fir.declare %[[VAL_8]] {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> !fir.ref<i32>
+// CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_9]] : (i32) -> f32
+// CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_9]] : (i32) -> i64
+// CHECK: %[[VAL_16:.*]] = fir.array_coor %[[VAL_12]] %[[VAL_15]] : (!fir.box<!fir.array<?xf32>>, i64) -> !fir.ref<f32>
+// CHECK: fir.store %[[VAL_14]] to %[[VAL_16]] : !fir.ref<f32>
+// CHECK: acc.yield
+// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
+// CHECK: acc.yield
+// CHECK: }
+// CHECK: return
+// CHECK: }
diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf
new file mode 100644
index 0000000..f68a9aa
--- /dev/null
+++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-default-init.cuf
@@ -0,0 +1,15 @@
+! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s
+
+program test
+implicit none
+
+type :: t1
+ real(4) :: x_fin(1:10) = acos(-1.0_4)
+end type t1
+
+type(t1), allocatable, device :: t(:)
+
+! CHECK: not yet implemented: CUDA Fortran: allocate on device with default initialization
+allocate(t(1:2))
+
+end program
diff --git a/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf
new file mode 100644
index 0000000..3e59e2f
--- /dev/null
+++ b/flang/test/Lower/CUDA/TODO/cuda-allocate-source-device.cuf
@@ -0,0 +1,9 @@
+! RUN: %not_todo_cmd bbc -emit-fir -fcuda -o - %s 2>&1 | FileCheck %s
+
+program main
+ implicit none
+ integer, device, allocatable :: a_d(:)
+ integer, allocatable :: a(:)
+! CHECK: not yet implemented: CUDA Fortran: allocate with device source
+ allocate(a, source=a_d)
+end program
diff --git a/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf
new file mode 100644
index 0000000..af850d5
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-associate-data-transfer.cuf
@@ -0,0 +1,21 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Test detection of CUDA Fortran data transfer in presence of associuate
+! statement.
+
+module m
+ real(8), device, dimension(10,10,10) :: d
+end module m
+
+subroutine foo
+ use m
+ !@CUF associate(d1 => d)
+ d1 = 0.0
+ !@CUF end associate
+end subroutine
+
+! CHECK-LABEL: func.func @_QPfoo()
+! CHECK: %[[D:.*]] = fir.address_of(@_QMmEd) : !fir.ref<!fir.array<10x10x10xf64>>
+! CHECK: %[[D_DECL:.*]]:2 = hlfir.declare %[[D]](%{{.*}}) {data_attr = #cuf.cuda<device>, uniq_name = "_QMmEd"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>)
+! CHECK: %[[D1_DECL:.*]]:2 = hlfir.declare %[[D_DECL]]#0(%4) {uniq_name = "_QFfooEd1"} : (!fir.ref<!fir.array<10x10x10xf64>>, !fir.shape<3>) -> (!fir.ref<!fir.array<10x10x10xf64>>, !fir.ref<!fir.array<10x10x10xf64>>)
+! CHECK: cuf.data_transfer %{{.*}} to %[[D1_DECL]]#0 {transfer_kind = #cuf.cuda_transfer<host_device>} : f64, !fir.ref<!fir.array<10x10x10xf64>>
diff --git a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90 b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
index 429f207..3987f9f 100644
--- a/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
+++ b/flang/test/Lower/OpenACC/acc-firstprivate-derived-allocatable-component.f90
@@ -4,6 +4,11 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
! RUN: bbc -fopenacc -emit-fir %s -o - | FileCheck %s --check-prefix=FIR-CHECK
+! TODO: This test hits a fatal TODO. Deal with allocatable component
+! destructions. For arrays, allocatable component allocation may also be
+! missing.
+! XFAIL: *
+
module m_firstprivate_derived_alloc_comp
type point
real, allocatable :: x(:)
diff --git a/flang/test/Lower/OpenACC/acc-private.f90 b/flang/test/Lower/OpenACC/acc-private.f90
index d37eb8d..485825d 100644
--- a/flang/test/Lower/OpenACC/acc-private.f90
+++ b/flang/test/Lower/OpenACC/acc-private.f90
@@ -26,6 +26,12 @@
! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x2xi32>>
! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.box<!fir.array<?x?x2xi32>>, !fir.box<!fir.array<?x?x2xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb4.ub9_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -47,6 +53,12 @@
! CHECK: %[[RIGHT:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
! CHECK: hlfir.assign %[[LEFT]] to %[[RIGHT]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -64,6 +76,12 @@
! CHECK: %[[DES_V2:.*]] = hlfir.designate %[[ARG1]] shape %[[SHAPE]] : (!fir.box<!fir.array<?xi32>>, !fir.shape<1>) -> !fir.box<!fir.array<?xi32>>
! CHECK: hlfir.assign %[[DES_V1]] to %[[DES_V2]] : !fir.box<!fir.array<?xi32>>, !fir.box<!fir.array<?xi32>>
! CHECK: acc.terminator
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_box_UxUx2xi32 : !fir.box<!fir.array<?x?x2xi32>> init {
@@ -74,6 +92,12 @@
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?x?x2xi32>, %[[DIM0]]#1, %[[DIM1]]#1 {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECL:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x2xi32>>, !fir.shape<3>) -> (!fir.box<!fir.array<?x?x2xi32>>, !fir.heap<!fir.array<?x?x2xi32>>)
! CHECK: acc.yield %[[DECL]]#0 : !fir.box<!fir.array<?x?x2xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?x?x2xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?x?x2xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?x?x2xi32>>) -> !fir.ref<!fir.array<?x?x2xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?x?x2xi32>>) -> !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?x?x2xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_ref_box_ptr_Uxi32 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>> init {
@@ -89,6 +113,13 @@
! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.ptr<!fir.array<?xi32>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.ptr<!fir.array<?xi32>>>) -> !fir.ptr<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ptr<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: @privatization_ref_box_heap_i32 : !fir.ref<!fir.box<!fir.heap<i32>>> init {
@@ -99,6 +130,12 @@
! CHECK: %[[BOX:.*]] = fir.embox %[[ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK: fir.store %[[BOX]] to %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK: acc.yield %[[DECLARE]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<i32>>>, %arg1: !fir.ref<!fir.box<!fir.heap<i32>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<i32>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
+! CHECK: fir.freemem %[[ADDR]] : !fir.heap<i32>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_ref_box_heap_Uxi32 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>> init {
@@ -114,6 +151,12 @@
! CHECK: %[[CONV:.*]] = fir.convert %[[DECLAREBOX]]#0 : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: fir.store %[[DECLARE]]#0 to %[[CONV]] : !fir.ref<!fir.box<!fir.array<?xi32>>>
! CHECK: acc.yield %[[DECLAREBOX]]#0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>, %arg1: !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>):
+! CHECK: %[[LOAD:.*]] = fir.load %arg1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.box<!fir.heap<!fir.array<?xi32>>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[ADDR]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.private.recipe @privatization_box_Uxi32 : !fir.box<!fir.array<?xi32>> init {
@@ -124,6 +167,12 @@
! CHECK: %[[TEMP:.*]] = fir.allocmem !fir.array<?xi32>, %0#1 {bindc_name = ".tmp", uniq_name = ""}
! CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[TEMP]](%[[SHAPE]]) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?xi32>>, !fir.shape<1>) -> (!fir.box<!fir.array<?xi32>>, !fir.heap<!fir.array<?xi32>>)
! CHECK: acc.yield %[[DECLARE:.*]]#0 : !fir.box<!fir.array<?xi32>>
+! CHECK: } destroy {
+! CHECK: ^bb0(%[[ARG0:.*]]: !fir.box<!fir.array<?xi32>>, %[[ARG1:.*]]: !fir.box<!fir.array<?xi32>>):
+! CHECK: %[[ADDR:.*]] = fir.box_addr %[[ARG1]] : (!fir.box<!fir.array<?xi32>>) -> !fir.ref<!fir.array<?xi32>>
+! CHECK: %[[CAST:.*]] = fir.convert %[[ADDR]] : (!fir.ref<!fir.array<?xi32>>) -> !fir.heap<!fir.array<?xi32>>
+! CHECK: fir.freemem %[[CAST]] : !fir.heap<!fir.array<?xi32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_section_lb50.ub99_ref_50xf32 : !fir.ref<!fir.array<50xf32>> init {
@@ -140,6 +189,7 @@
! CHECK: %[[DES_SRC:.*]] = hlfir.designate %[[DECL_SRC]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
! CHECK: %[[DES_DST:.*]] = hlfir.designate %[[DECL_DST]]#0 shape %[[SHAPE:.*]] : (!fir.ref<!fir.array<50xf32>>, !fir.shape<1>) -> !fir.ref<!fir.array<50xf32>>
! CHECK: hlfir.assign %[[DES_SRC]] to %[[DES_DST]] : !fir.ref<!fir.array<50xf32>>, !fir.ref<!fir.array<50xf32>>
+! CHECK: acc.terminator
! CHECK: }
! CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_ref_100xf32 : !fir.ref<!fir.array<100xf32>> init {
diff --git a/flang/test/Lower/OpenMP/Todo/attach-modifier.f90 b/flang/test/Lower/OpenMP/Todo/attach-modifier.f90
new file mode 100644
index 0000000..099f4a4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/Todo/attach-modifier.f90
@@ -0,0 +1,9 @@
+!RUN: %not_todo_cmd bbc -emit-hlfir -fopenmp -fopenmp-version=61 -o - %s 2>&1 | FileCheck %s
+!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=61 -o - %s 2>&1 | FileCheck %s
+
+!CHECK: not yet implemented: ATTACH modifier is not implemented yet
+subroutine f00(x)
+ integer, pointer :: x
+ !$omp target map(attach(always), tofrom: x)
+ !$omp end target
+end
diff --git a/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90 b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
new file mode 100644
index 0000000..7fc30b4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/cptr-usm-close-and-use-device-ptr.f90
@@ -0,0 +1,21 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=50 %s -o - | FileCheck %s
+!
+! Checks:
+! - C_PTR mappings expand to `__address` member with CLOSE under USM paths.
+! - use_device_ptr does not implicitly expand member operands in the clause.
+
+subroutine only_cptr_use_device_ptr
+ use iso_c_binding
+ type(c_ptr) :: cptr
+ integer :: i
+
+ !$omp target data use_device_ptr(cptr) map(tofrom: i)
+ !$omp end target data
+end subroutine
+
+! CHECK-LABEL: func.func @_QPonly_cptr_use_device_ptr()
+! CHECK: %[[I_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<i32>, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref<i32> {name = "i"}
+! CHECK: %[[CP_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>, !fir.type<{{.*}}__builtin_c_ptr{{.*}}>) map_clauses(return_param) capture(ByRef) -> !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>
+! CHECK: omp.target_data map_entries(%[[I_MAP]] : !fir.ref<i32>) use_device_ptr(%[[CP_MAP]] -> %{{.*}} : !fir.ref<!fir.type<{{.*}}__builtin_c_ptr{{.*}}>>) {
+! CHECK: omp.terminator
+! CHECK: }
diff --git a/flang/test/Parser/OpenMP/map-modifiers-v61.f90 b/flang/test/Parser/OpenMP/map-modifiers-v61.f90
new file mode 100644
index 0000000..79bf73a
--- /dev/null
+++ b/flang/test/Parser/OpenMP/map-modifiers-v61.f90
@@ -0,0 +1,64 @@
+!RUN: %flang_fc1 -fdebug-unparse-no-sema -fopenmp -fopenmp-version=61 %s | FileCheck --ignore-case --check-prefix="UNPARSE" %s
+!RUN: %flang_fc1 -fdebug-dump-parse-tree-no-sema -fopenmp -fopenmp-version=61 %s | FileCheck --check-prefix="PARSE-TREE" %s
+
+subroutine f00(x)
+ integer, pointer :: x
+ !$omp target map(attach(always): x)
+ !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f00 (x)
+!UNPARSE: INTEGER, POINTER :: x
+!UNPARSE: !$OMP TARGET MAP(ATTACH(ALWAYS): x)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause
+!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Always
+!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | bool = 'true'
+!PARSE-TREE: | Flags = None
+
+
+subroutine f01(x)
+ integer, pointer :: x
+ !$omp target map(attach(auto): x)
+ !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f01 (x)
+!UNPARSE: INTEGER, POINTER :: x
+!UNPARSE: !$OMP TARGET MAP(ATTACH(AUTO): x)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause
+!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Auto
+!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | bool = 'true'
+!PARSE-TREE: | Flags = None
+
+
+subroutine f02(x)
+ integer, pointer :: x
+ !$omp target map(attach(never): x)
+ !$omp end target
+end
+
+!UNPARSE: SUBROUTINE f02 (x)
+!UNPARSE: INTEGER, POINTER :: x
+!UNPARSE: !$OMP TARGET MAP(ATTACH(NEVER): x)
+!UNPARSE: !$OMP END TARGET
+!UNPARSE: END SUBROUTINE
+
+!PARSE-TREE: OmpBeginDirective
+!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = target
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Map -> OmpMapClause
+!PARSE-TREE: | | Modifier -> OmpAttachModifier -> Value = Never
+!PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'x'
+!PARSE-TREE: | | bool = 'true'
+!PARSE-TREE: | Flags = None
diff --git a/flang/test/Semantics/OpenMP/dump-requires-details.f90 b/flang/test/Semantics/OpenMP/dump-requires-details.f90
new file mode 100644
index 0000000..9c844c0
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/dump-requires-details.f90
@@ -0,0 +1,14 @@
+!RUN: %flang_fc1 -fopenmp -fopenmp-version=60 -fdebug-dump-symbols %s | FileCheck %s
+
+module fred
+!$omp requires atomic_default_mem_order(relaxed)
+contains
+subroutine f00
+ !$omp requires unified_address
+end
+subroutine f01
+ !$omp requires unified_shared_memory
+end
+end module
+
+!CHECK: fred: Module OmpRequirements:(atomic_default_mem_order(relaxed),unified_address,unified_shared_memory)
diff --git a/flang/test/Semantics/OpenMP/map-modifiers-v61.f90 b/flang/test/Semantics/OpenMP/map-modifiers-v61.f90
new file mode 100644
index 0000000..2daa57892
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/map-modifiers-v61.f90
@@ -0,0 +1,49 @@
+!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=61 -Werror
+
+subroutine f00(x)
+ integer, pointer :: x
+ !ERROR: 'attach-modifier' modifier cannot occur multiple times
+ !$omp target map(attach(always), attach(never): x)
+ !$omp end target
+end
+
+subroutine f01(x)
+ integer, pointer :: x
+ !ERROR: The 'attach-modifier' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive
+ !$omp target_exit_data map(attach(always): x)
+end
+
+subroutine f02(x)
+ integer, pointer :: x
+ !ERROR: The 'attach-modifier' modifier can only appear on a map-entering construct or on a DECLARE_MAPPER directive
+ !$omp target map(attach(never), from: x)
+ !$omp end target
+end
+
+subroutine f03(x)
+ integer :: x
+ !ERROR: A list-item that appears in a map clause with the ATTACH modifier must have a base-pointer
+ !$omp target map(attach(always), tofrom: x)
+ !$omp end target
+end
+
+module m
+type t
+ integer :: z
+end type
+
+type u
+ type(t), pointer :: y
+end type
+
+contains
+
+subroutine f04(n)
+ integer :: n
+ type(u) :: x(10)
+
+ !Expect no diagonstics
+ !$omp target map(attach(always), to: x(n)%y%z)
+ !$omp end target
+end
+end module
diff --git a/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90 b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90
new file mode 100644
index 0000000..6268b0b
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/omp-atomic-write-pointer-derived.f90
@@ -0,0 +1,8 @@
+! RUN: not %flang_fc1 -fopenmp -fsyntax-only %s 2>&1 | FileCheck %s
+type t
+end type
+type(t), pointer :: a1, a2
+!$omp atomic write
+a1 = a2
+! CHECK: error: ATOMIC operation requires an intrinsic scalar variable; 'a1' has the POINTER attribute and derived type 't'
+end
diff --git a/flang/test/Semantics/OpenMP/requires-modfile.f90 b/flang/test/Semantics/OpenMP/requires-modfile.f90
new file mode 100644
index 0000000..2f06104
--- /dev/null
+++ b/flang/test/Semantics/OpenMP/requires-modfile.f90
@@ -0,0 +1,39 @@
+!RUN: %python %S/../test_modfile.py %s %flang_fc1 -fopenmp -fopenmp-version=52
+
+module req
+contains
+
+! The requirements from the subprograms should be added to the module.
+subroutine f00
+ !$omp requires reverse_offload
+end
+
+subroutine f01
+ !$omp requires atomic_default_mem_order(seq_cst)
+end
+end module
+
+module user
+! The requirements from module req should be propagated to this module.
+use req
+end module
+
+
+!Expect: req.mod
+!module req
+!!$omp requires atomic_default_mem_order(seq_cst)
+!!$omp requires reverse_offload
+!contains
+!subroutine f00()
+!end
+!subroutine f01()
+!end
+!end
+
+!Expect: user.mod
+!module user
+!use req,only:f00
+!use req,only:f01
+!!$omp requires atomic_default_mem_order(seq_cst)
+!!$omp requires reverse_offload
+!end
diff --git a/flang/test/Semantics/OpenMP/requires09.f90 b/flang/test/Semantics/OpenMP/requires09.f90
index 2fa5d950..ca6ad5e 100644
--- a/flang/test/Semantics/OpenMP/requires09.f90
+++ b/flang/test/Semantics/OpenMP/requires09.f90
@@ -3,12 +3,16 @@
! 2.4 Requires directive
! All atomic_default_mem_order clauses in 'requires' directives found within a
! compilation unit must specify the same ordering.
+!ERROR: Conflicting 'ATOMIC_DEFAULT_MEM_ORDER' REQUIRES clauses found in compilation unit
+module m
+contains
subroutine f
!$omp requires atomic_default_mem_order(seq_cst)
end subroutine f
-!ERROR: Conflicting 'ATOMIC_DEFAULT_MEM_ORDER' REQUIRES clauses found in compilation unit
subroutine g
!$omp requires atomic_default_mem_order(relaxed)
end subroutine g
+
+end module
diff --git a/flang/test/Semantics/dynamic-type-intrinsics.f90 b/flang/test/Semantics/dynamic-type-intrinsics.f90
new file mode 100644
index 0000000..a4ce3db
--- /dev/null
+++ b/flang/test/Semantics/dynamic-type-intrinsics.f90
@@ -0,0 +1,73 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1
+
+module m
+ type :: t1
+ real :: x
+ end type
+ type :: t2(k)
+ integer, kind :: k
+ real(kind=k) :: x
+ end type
+ type :: t3
+ real :: x
+ end type
+ type, extends(t1) :: t4
+ integer :: y
+ end type
+ type :: t5
+ sequence
+ integer :: x
+ integer :: y
+ end type
+
+ integer :: i
+ real :: r
+ type(t1) :: x1, y1
+ type(t2(4)) :: x24, y24
+ type(t2(8)) :: x28
+ type(t3) :: x3
+ type(t4) :: x4
+ type(t5) :: x5
+ class(t1), allocatable :: a1
+ class(t3), allocatable :: a3
+
+ integer(kind=merge(kind(1),-1,same_type_as(x1, x1))) same_type_as_x1_x1_true
+ integer(kind=merge(kind(1),-1,same_type_as(x1, y1))) same_type_as_x1_y1_true
+ integer(kind=merge(kind(1),-1,same_type_as(x24, x24))) same_type_as_x24_x24_true
+ integer(kind=merge(kind(1),-1,same_type_as(x24, y24))) same_type_as_x24_y24_true
+ integer(kind=merge(kind(1),-1,same_type_as(x24, x28))) same_type_as_x24_x28_true
+ !ERROR: INTEGER(KIND=-1) is not a supported type
+ integer(kind=merge(kind(1),-1,same_type_as(x1, x3))) same_type_as_x1_x3_false
+ !ERROR: INTEGER(KIND=-1) is not a supported type
+ integer(kind=merge(kind(1),-1,same_type_as(a1, a3))) same_type_as_a1_a3_false
+ !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t1_8 = same_type_as(x5, x5)
+ !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t1_9 = same_type_as(x5, x1)
+ !ERROR: Actual argument for 'b=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t1_10 = same_type_as(x1, x5)
+ !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type
+ logical :: t1_11 = same_type_as(i, i)
+ !ERROR: Actual argument for 'a=' has bad type 'REAL(4)', expected extensible or unlimited polymorphic type
+ logical :: t1_12 = same_type_as(r, r)
+ !ERROR: Actual argument for 'a=' has bad type 'INTEGER(4)', expected extensible or unlimited polymorphic type
+ logical :: t1_13 = same_type_as(i, t)
+
+ integer(kind=merge(kind(1),-1,extends_type_of(x1, y1))) extends_type_of_x1_y1_true
+ integer(kind=merge(kind(1),-1,extends_type_of(x24, x24))) extends_type_of_x24_x24_true
+ integer(kind=merge(kind(1),-1,extends_type_of(x24, y24))) extends_type_of_x24_y24_true
+ integer(kind=merge(kind(1),-1,extends_type_of(x24, x28))) extends_type_of_x24_x28_true
+ !ERROR: INTEGER(KIND=-1) is not a supported type
+ integer(kind=merge(kind(1),-1,extends_type_of(x1, x3))) extends_type_of_x1_x3_false
+ !ERROR: INTEGER(KIND=-1) is not a supported type
+ integer(kind=merge(kind(1),-1,extends_type_of(a1, a3))) extends_type_of_a1_a3_false
+ !ERROR: INTEGER(KIND=-1) is not a supported type
+ integer(kind=merge(kind(1),-1,extends_type_of(x1, x4))) extends_type_of_x1_x4_false
+ integer(kind=merge(kind(1),-1,extends_type_of(x4, x1))) extends_type_of_x4_x1_true
+ !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t2_9 = extends_type_of(x5, x5)
+ !ERROR: Actual argument for 'a=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t2_10 = extends_type_of(x5, x1)
+ !ERROR: Actual argument for 'mold=' has type 't5', but was expected to be an extensible or unlimited polymorphic type
+ logical :: t2_11 = extends_type_of(x1, x5)
+end module
diff --git a/flang/test/Semantics/io11.f90 b/flang/test/Semantics/io11.f90
index c00deed..6bb7a71 100644
--- a/flang/test/Semantics/io11.f90
+++ b/flang/test/Semantics/io11.f90
@@ -809,3 +809,24 @@ module m29
end
end interface
end
+
+module m30
+ type base
+ character(5), allocatable :: data
+ end type
+ interface write(formatted)
+ subroutine formattedRead (dtv, unit, iotype, v_list, iostat, iomsg)
+ import base
+ !ERROR: Dummy argument 'dtv' of a defined input/output procedure must be a scalar
+ class (base), intent(in) :: dtv(10)
+ integer, intent(in) :: unit
+ !ERROR: Dummy argument 'iotype' of a defined input/output procedure must be a scalar
+ character(*), intent(in) :: iotype(2)
+ integer, intent(in) :: v_list(:)
+ !ERROR: Dummy argument 'iostat' of a defined input/output procedure must be a scalar
+ integer, intent(out) :: iostat(*)
+ !ERROR: Dummy argument 'iomsg' of a defined input/output procedure must be a scalar
+ character(*), intent(inout) :: iomsg(:)
+ end subroutine
+ end interface
+end module
diff --git a/flang/tools/fir-opt/CMakeLists.txt b/flang/tools/fir-opt/CMakeLists.txt
index 4ee9752..c5bd439 100644
--- a/flang/tools/fir-opt/CMakeLists.txt
+++ b/flang/tools/fir-opt/CMakeLists.txt
@@ -22,6 +22,7 @@ target_link_libraries(fir-opt PRIVATE
HLFIRDialect
HLFIRTransforms
FIROpenACCSupport
+ FIROpenACCTransforms
FIROpenMPSupport
FlangOpenMPTransforms
FIRAnalysis
diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp
index d66fc3f..b0b277b 100644
--- a/flang/tools/fir-opt/fir-opt.cpp
+++ b/flang/tools/fir-opt/fir-opt.cpp
@@ -14,6 +14,7 @@
#include "mlir/Tools/mlir-opt/MlirOptMain.h"
#include "flang/Optimizer/CodeGen/CodeGen.h"
#include "flang/Optimizer/HLFIR/Passes.h"
+#include "flang/Optimizer/OpenACC/Passes.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "flang/Optimizer/Support/InitFIR.h"
#include "flang/Optimizer/Transforms/Passes.h"
@@ -37,6 +38,7 @@ int main(int argc, char **argv) {
fir::registerOptTransformPasses();
hlfir::registerHLFIRPasses();
flangomp::registerFlangOpenMPPasses();
+ fir::acc::registerFIROpenACCPasses();
#ifdef FLANG_INCLUDE_TESTS
fir::test::registerTestFIRAliasAnalysisPass();
fir::test::registerTestFIROpenACCInterfacesPass();