aboutsummaryrefslogtreecommitdiff
path: root/flang
diff options
context:
space:
mode:
Diffstat (limited to 'flang')
-rw-r--r--flang/docs/Directives.md8
-rw-r--r--flang/include/flang/Evaluate/call.h10
-rw-r--r--flang/include/flang/Optimizer/Builder/IntrinsicCall.h16
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIRAttr.td20
-rw-r--r--flang/include/flang/Optimizer/Dialect/FIROps.td1
-rw-r--r--flang/include/flang/Parser/dump-parse-tree.h9
-rw-r--r--flang/include/flang/Parser/openmp-utils.h22
-rw-r--r--flang/include/flang/Parser/parse-tree.h73
-rw-r--r--flang/include/flang/Semantics/symbol.h2
-rw-r--r--flang/lib/Lower/Bridge.cpp128
-rw-r--r--flang/lib/Lower/ConvertCall.cpp13
-rw-r--r--flang/lib/Optimizer/Builder/IntrinsicCall.cpp315
-rw-r--r--flang/lib/Optimizer/CodeGen/CodeGen.cpp12
-rw-r--r--flang/lib/Optimizer/CodeGen/TargetRewrite.cpp9
-rw-r--r--flang/lib/Optimizer/Dialect/FIROps.cpp7
-rw-r--r--flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp3
-rw-r--r--flang/lib/Parser/Fortran-parsers.cpp8
-rw-r--r--flang/lib/Parser/openmp-parsers.cpp244
-rw-r--r--flang/lib/Parser/openmp-utils.cpp12
-rw-r--r--flang/lib/Parser/parse-tree.cpp27
-rw-r--r--flang/lib/Parser/unparse.cpp44
-rw-r--r--flang/lib/Semantics/canonicalize-directives.cpp6
-rw-r--r--flang/lib/Semantics/resolve-directives.cpp17
-rw-r--r--flang/lib/Semantics/resolve-names.cpp104
-rw-r--r--flang/module/cudadevice.f90155
-rw-r--r--flang/test/Fir/CUDA/cuda-target-rewrite.mlir20
-rw-r--r--flang/test/Integration/inline_directive.f9069
-rw-r--r--flang/test/Lower/CUDA/cuda-device-proc.cuf249
-rw-r--r--flang/test/Lower/inline_directive.f9061
-rw-r--r--flang/test/Parser/OpenMP/declare-reduction-multi.f90136
-rw-r--r--flang/test/Parser/OpenMP/declare-reduction-operator.f90110
-rw-r--r--flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f902
-rw-r--r--flang/test/Parser/OpenMP/declare-reduction-unparse.f9057
-rw-r--r--flang/test/Parser/OpenMP/metadirective-dirspec.f9055
-rw-r--r--flang/test/Parser/OpenMP/openmp6-directive-spellings.f9035
-rw-r--r--flang/test/Parser/compiler-directives.f9024
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-error.f9011
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-functions.f9052
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-logical.f907
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-modfile.f9012
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-operator.f906
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-operators.f907
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction-renamedop.f909
-rw-r--r--flang/test/Semantics/OpenMP/declare-reduction.f9016
44 files changed, 1947 insertions, 256 deletions
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md
index 91c27cb..3ebb08c 100644
--- a/flang/docs/Directives.md
+++ b/flang/docs/Directives.md
@@ -53,6 +53,14 @@ A list of non-standard directives supported by Flang
* `!dir$ novector` disabling vectorization on the following loop.
* `!dir$ nounroll` disabling unrolling on the following loop.
* `!dir$ nounroll_and_jam` disabling unrolling and jamming on the following loop.
+* `!dir$ inline` instructs the compiler to attempt to inline the called routines if the
+ directive is specified before a call statement or all call statements within the loop
+ body if specified before a DO LOOP or all function references if specified before an
+ assignment statement.
+* `!dir$ forceinline` works in the same way as the `inline` directive, but it forces
+ inlining by the compiler on a function call statement.
+* `!dir$ noinline` works in the same way as the `inline` directive, but prevents
+ any attempt of inlining by the compiler on a function call statement.
# Directive Details
diff --git a/flang/include/flang/Evaluate/call.h b/flang/include/flang/Evaluate/call.h
index fea09d6..93e36f6 100644
--- a/flang/include/flang/Evaluate/call.h
+++ b/flang/include/flang/Evaluate/call.h
@@ -255,6 +255,13 @@ public:
bool IsElemental() const { return proc_.IsElemental(); }
bool hasAlternateReturns() const { return hasAlternateReturns_; }
+ bool hasNoInline() const { return noInline_; }
+ void setNoInline(bool ni) { noInline_ = ni; }
+ bool hasAlwaysInline() const { return alwaysInline_; }
+ void setAlwaysInline(bool ai) { alwaysInline_ = ai; }
+ bool hasInlineHint() const { return inlineHint_; }
+ void setInlineHint(bool ih) { inlineHint_ = ih; }
+
Expr<SomeType> *UnwrapArgExpr(int n) {
if (static_cast<std::size_t>(n) < arguments_.size() && arguments_[n]) {
return arguments_[n]->UnwrapExpr();
@@ -278,6 +285,9 @@ protected:
ActualArguments arguments_;
Chevrons chevrons_;
bool hasAlternateReturns_;
+ bool noInline_{false};
+ bool alwaysInline_{false};
+ bool inlineHint_{false};
};
template <typename A> class FunctionRef : public ProcedureRef {
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index c3cd119b..3407dd0 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -211,6 +211,8 @@ struct IntrinsicLibrary {
mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>);
+ mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genBesselJn(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genBesselYn(mlir::Type,
@@ -459,7 +461,21 @@ struct IntrinsicLibrary {
mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>);
+ void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>);
void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genTransfer(mlir::Type,
diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
index 7bd96ac3..8a8c60f 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td
@@ -219,4 +219,24 @@ def LocalitySpecifierTypeAttr : EnumAttr<FIROpsDialect, LocalitySpecifierType,
let assemblyFormat = "`{` `type` `=` $value `}`";
}
+/// Fortran inline attribute
+def FIRinlineNone : I32BitEnumAttrCaseNone<"none">;
+def FIRinlineNo : I32BitEnumAttrCaseBit<"no_inline", 0>;
+def FIRinlineAlways : I32BitEnumAttrCaseBit<"always_inline", 1>;
+def FIRinlineHint : I32BitEnumAttrCaseBit<"inline_hint", 2>;
+
+def fir_FortranInlineEnum
+ : I32BitEnumAttr<"FortranInlineEnum", "Fortran inline attributes",
+ [FIRinlineNone, FIRinlineNo, FIRinlineAlways,
+ FIRinlineHint]> {
+ let separator = ", ";
+ let cppNamespace = "::fir";
+ let genSpecializedAttr = 0;
+ let printBitEnumPrimaryGroups = 1;
+}
+
+def fir_FortranInlineAttr
+ : EnumAttr<FIROpsDialect, fir_FortranInlineEnum, "inline_attrs"> {
+ let assemblyFormat = "`<` $value `>`";
+}
#endif // FIR_DIALECT_FIR_ATTRS
diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 86502c6..58a317c 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -2549,6 +2549,7 @@ def fir_CallOp : fir_Op<"call",
OptionalAttr<DictArrayAttr>:$arg_attrs,
OptionalAttr<DictArrayAttr>:$res_attrs,
OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
+ OptionalAttr<fir_FortranInlineAttr>:$inline_attr,
DefaultValuedAttr<Arith_FastMathAttr,
"::mlir::arith::FastMathFlags::none">:$fastmath
);
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index af8152d..bb97069 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -206,8 +206,11 @@ public:
NODE(parser, CompilerDirective)
NODE(CompilerDirective, AssumeAligned)
NODE(CompilerDirective, IgnoreTKR)
+ NODE(CompilerDirective, Inline)
+ NODE(CompilerDirective, ForceInline)
NODE(CompilerDirective, LoopCount)
NODE(CompilerDirective, NameValue)
+ NODE(CompilerDirective, NoInline)
NODE(CompilerDirective, Unrecognized)
NODE(CompilerDirective, VectorAlways)
NODE(CompilerDirective, Unroll)
@@ -596,7 +599,7 @@ public:
NODE(parser, OmpInitClause)
NODE(OmpInitClause, Modifier)
NODE(parser, OmpInitializerClause)
- NODE(parser, OmpInitializerProc)
+ NODE(parser, OmpInitializerExpression)
NODE(parser, OmpInReductionClause)
NODE(OmpInReductionClause, Modifier)
NODE(parser, OmpInteropPreference)
@@ -674,6 +677,10 @@ public:
NODE_ENUM(OmpSeverityClause, Severity)
NODE(parser, OmpStepComplexModifier)
NODE(parser, OmpStepSimpleModifier)
+ NODE(parser, OmpStylizedDeclaration)
+ NODE(parser, OmpStylizedExpression)
+ NODE(parser, OmpStylizedInstance)
+ NODE(OmpStylizedInstance, Instance)
NODE(parser, OmpTaskDependenceType)
NODE_ENUM(OmpTaskDependenceType, Value)
NODE(parser, OmpTaskReductionClause)
diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h
index f761332..49db091 100644
--- a/flang/include/flang/Parser/openmp-utils.h
+++ b/flang/include/flang/Parser/openmp-utils.h
@@ -25,6 +25,13 @@
namespace Fortran::parser::omp {
+template <typename T> constexpr auto addr_if(std::optional<T> &x) {
+ return x ? &*x : nullptr;
+}
+template <typename T> constexpr auto addr_if(const std::optional<T> &x) {
+ return x ? &*x : nullptr;
+}
+
namespace detail {
using D = llvm::omp::Directive;
@@ -133,9 +140,24 @@ template <typename T> OmpDirectiveName GetOmpDirectiveName(const T &x) {
}
const OmpObjectList *GetOmpObjectList(const OmpClause &clause);
+
+template <typename T>
+const T *GetFirstArgument(const OmpDirectiveSpecification &spec) {
+ for (const OmpArgument &arg : spec.Arguments().v) {
+ if (auto *t{std::get_if<T>(&arg.u)}) {
+ return t;
+ }
+ }
+ return nullptr;
+}
+
const BlockConstruct *GetFortranBlockConstruct(
const ExecutionPartConstruct &epc);
+const OmpCombinerExpression *GetCombinerExpr(
+ const OmpReductionSpecifier &rspec);
+const OmpInitializerExpression *GetInitializerExpr(const OmpClause &init);
+
} // namespace Fortran::parser::omp
#endif // FORTRAN_PARSER_OPENMP_UTILS_H
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index bb47f31..c3a8c2e 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -24,7 +24,9 @@
#include "provenance.h"
#include "flang/Common/idioms.h"
#include "flang/Common/indirection.h"
+#include "flang/Common/reference.h"
#include "flang/Support/Fortran.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Frontend/OpenACC/ACC.h.inc"
#include "llvm/Frontend/OpenMP/OMP.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
@@ -3356,6 +3358,9 @@ struct StmtFunctionStmt {
// !DIR$ NOVECTOR
// !DIR$ NOUNROLL
// !DIR$ NOUNROLL_AND_JAM
+// !DIR$ FORCEINLINE
+// !DIR$ INLINE
+// !DIR$ NOINLINE
// !DIR$ <anything else>
struct CompilerDirective {
UNION_CLASS_BOILERPLATE(CompilerDirective);
@@ -3384,11 +3389,14 @@ struct CompilerDirective {
EMPTY_CLASS(NoVector);
EMPTY_CLASS(NoUnroll);
EMPTY_CLASS(NoUnrollAndJam);
+ EMPTY_CLASS(ForceInline);
+ EMPTY_CLASS(Inline);
+ EMPTY_CLASS(NoInline);
EMPTY_CLASS(Unrecognized);
CharBlock source;
std::variant<std::list<IgnoreTKR>, LoopCount, std::list<AssumeAligned>,
VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized,
- NoVector, NoUnroll, NoUnrollAndJam>
+ NoVector, NoUnroll, NoUnrollAndJam, ForceInline, Inline, NoInline>
u;
};
@@ -3504,6 +3512,8 @@ struct OmpDirectiveName {
// type-name list item
struct OmpTypeName {
+ CharBlock source;
+ mutable const semantics::DeclTypeSpec *declTypeSpec{nullptr};
UNION_CLASS_BOILERPLATE(OmpTypeName);
std::variant<TypeSpec, DeclarationTypeSpec> u;
};
@@ -3532,6 +3542,39 @@ struct OmpObjectList {
WRAPPER_CLASS_BOILERPLATE(OmpObjectList, std::list<OmpObject>);
};
+struct OmpStylizedDeclaration {
+ COPY_AND_ASSIGN_BOILERPLATE(OmpStylizedDeclaration);
+ // Since "Reference" isn't handled by parse-tree-visitor, add EmptyTrait,
+ // and visit the members by hand when needed.
+ using EmptyTrait = std::true_type;
+ common::Reference<const OmpTypeName> type;
+ EntityDecl var;
+};
+
+struct OmpStylizedInstance {
+ struct Instance {
+ UNION_CLASS_BOILERPLATE(Instance);
+ std::variant<AssignmentStmt, CallStmt, common::Indirection<Expr>> u;
+ };
+ TUPLE_CLASS_BOILERPLATE(OmpStylizedInstance);
+ std::tuple<std::list<OmpStylizedDeclaration>, Instance> t;
+};
+
+class ParseState;
+
+// Ref: [5.2:76], [6.0:185]
+//
+struct OmpStylizedExpression {
+ CharBlock source;
+ // Pointer to a temporary copy of the ParseState that is used to create
+ // additional parse subtrees for the stylized expression. This is only
+ // used internally during parsing and conveys no information to the
+ // consumers of the AST.
+ const ParseState *state{nullptr};
+ WRAPPER_CLASS_BOILERPLATE(
+ OmpStylizedExpression, std::list<OmpStylizedInstance>);
+};
+
// Ref: [4.5:201-207], [5.0:293-299], [5.1:325-331], [5.2:124]
//
// reduction-identifier ->
@@ -3549,9 +3592,22 @@ struct OmpReductionIdentifier {
// combiner-expression -> // since 4.5
// assignment-statement |
// function-reference
-struct OmpCombinerExpression {
- UNION_CLASS_BOILERPLATE(OmpCombinerExpression);
- std::variant<AssignmentStmt, FunctionReference> u;
+struct OmpCombinerExpression : public OmpStylizedExpression {
+ INHERITED_WRAPPER_CLASS_BOILERPLATE(
+ OmpCombinerExpression, OmpStylizedExpression);
+ static llvm::ArrayRef<CharBlock> Variables();
+};
+
+// Ref: [4.5:222:7-8], [5.0:305:28-29], [5.1:337:20-21], [5.2:127:6-8],
+// [6.0:242:3-5]
+//
+// initializer-expression -> // since 4.5
+// OMP_PRIV = expression |
+// subroutine-name(argument-list)
+struct OmpInitializerExpression : public OmpStylizedExpression {
+ INHERITED_WRAPPER_CLASS_BOILERPLATE(
+ OmpInitializerExpression, OmpStylizedExpression);
+ static llvm::ArrayRef<CharBlock> Variables();
};
inline namespace arguments {
@@ -4552,16 +4608,9 @@ struct OmpInReductionClause {
std::tuple<MODIFIERS(), OmpObjectList> t;
};
-// declare-reduction -> DECLARE REDUCTION (reduction-identifier : type-list
-// : combiner) [initializer-clause]
-struct OmpInitializerProc {
- TUPLE_CLASS_BOILERPLATE(OmpInitializerProc);
- std::tuple<ProcedureDesignator, std::list<ActualArgSpec>> t;
-};
// Initialization for declare reduction construct
struct OmpInitializerClause {
- UNION_CLASS_BOILERPLATE(OmpInitializerClause);
- std::variant<OmpInitializerProc, AssignmentStmt> u;
+ WRAPPER_CLASS_BOILERPLATE(OmpInitializerClause, OmpInitializerExpression);
};
// Ref: [4.5:199-201], [5.0:288-290], [5.1:321-322], [5.2:115-117]
diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h
index 04a0639..cb27d544 100644
--- a/flang/include/flang/Semantics/symbol.h
+++ b/flang/include/flang/Semantics/symbol.h
@@ -830,6 +830,8 @@ public:
OmpUseDevicePtr, OmpUseDeviceAddr, OmpIsDevicePtr, OmpHasDeviceAddr,
// OpenMP data-copying attribute
OmpCopyIn, OmpCopyPrivate,
+ // OpenMP special variables
+ OmpInVar, OmpOrigVar, OmpOutVar, OmpPrivVar,
// OpenMP miscellaneous flags
OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned, OmpNontemporal,
OmpAllocate, OmpDeclarativeAllocateDirective,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a516a44..6e72987 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -1884,6 +1884,26 @@ private:
setCurrentPosition(stmt.source);
assert(stmt.typedCall && "Call was not analyzed");
mlir::Value res{};
+
+ // Set 'no_inline', 'inline_hint' or 'always_inline' to true on the
+ // ProcedureRef. The NoInline and AlwaysInline attribute will be set in
+ // genProcedureRef later.
+ for (const auto *dir : eval.dirs) {
+ Fortran::common::visit(
+ Fortran::common::visitors{
+ [&](const Fortran::parser::CompilerDirective::ForceInline &) {
+ stmt.typedCall->setAlwaysInline(true);
+ },
+ [&](const Fortran::parser::CompilerDirective::Inline &) {
+ stmt.typedCall->setInlineHint(true);
+ },
+ [&](const Fortran::parser::CompilerDirective::NoInline &) {
+ stmt.typedCall->setNoInline(true);
+ },
+ [&](const auto &) {}},
+ dir->u);
+ }
+
if (lowerToHighLevelFIR()) {
std::optional<mlir::Type> resultType;
if (stmt.typedCall->hasAlternateReturns())
@@ -2200,6 +2220,50 @@ private:
// so no clean-up needs to be generated for these entities.
}
+ void attachInlineAttributes(
+ mlir::Operation &op,
+ const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs) {
+ if (dirs.empty())
+ return;
+
+ for (mlir::Value operand : op.getOperands()) {
+ if (operand.getDefiningOp())
+ attachInlineAttributes(*operand.getDefiningOp(), dirs);
+ }
+
+ if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) {
+ for (const auto *dir : dirs) {
+ Fortran::common::visit(
+ Fortran::common::visitors{
+ [&](const Fortran::parser::CompilerDirective::NoInline &) {
+ callOp.setInlineAttr(fir::FortranInlineEnum::no_inline);
+ },
+ [&](const Fortran::parser::CompilerDirective::Inline &) {
+ callOp.setInlineAttr(fir::FortranInlineEnum::inline_hint);
+ },
+ [&](const Fortran::parser::CompilerDirective::ForceInline &) {
+ callOp.setInlineAttr(fir::FortranInlineEnum::always_inline);
+ },
+ [&](const auto &) {}},
+ dir->u);
+ }
+ }
+ }
+
+ void attachAttributesToDoLoopOperations(
+ fir::DoLoopOp &doLoop,
+ llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
+ if (!doLoop.getOperation() || dirs.empty())
+ return;
+
+ for (mlir::Block &block : doLoop.getRegion()) {
+ for (mlir::Operation &op : block.getOperations()) {
+ if (!dirs.empty())
+ attachInlineAttributes(op, dirs);
+ }
+ }
+ }
+
/// Generate FIR for a DO construct. There are six variants:
/// - unstructured infinite and while loops
/// - structured and unstructured increment loops
@@ -2351,6 +2415,11 @@ private:
if (!incrementLoopNestInfo.empty() &&
incrementLoopNestInfo.back().isConcurrent)
localSymbols.popScope();
+
+ // Add attribute(s) on operations in fir::DoLoopOp if necessary
+ for (IncrementLoopInfo &info : incrementLoopNestInfo)
+ if (auto loopOp = mlir::dyn_cast_if_present<fir::DoLoopOp>(info.loopOp))
+ attachAttributesToDoLoopOperations(loopOp, doStmtEval.dirs);
}
/// Generate FIR to evaluate loop control values (lower, upper and step).
@@ -3154,6 +3223,26 @@ private:
e->dirs.push_back(&dir);
}
+ void
+ attachInliningDirectiveToStmt(const Fortran::parser::CompilerDirective &dir,
+ Fortran::lower::pft::Evaluation *e) {
+ while (e->isDirective())
+ e = e->lexicalSuccessor;
+
+ // If the successor is a statement or a do loop, the compiler
+ // will perform inlining.
+ if (e->isA<Fortran::parser::CallStmt>() ||
+ e->isA<Fortran::parser::NonLabelDoStmt>() ||
+ e->isA<Fortran::parser::AssignmentStmt>()) {
+ e->dirs.push_back(&dir);
+ } else {
+ mlir::Location loc = toLocation();
+ mlir::emitWarning(loc,
+ "Inlining directive not in front of loops, function"
+ "call or assignment.\n");
+ }
+ }
+
void genFIR(const Fortran::parser::CompilerDirective &dir) {
Fortran::lower::pft::Evaluation &eval = getEval();
@@ -3177,6 +3266,15 @@ private:
[&](const Fortran::parser::CompilerDirective::NoUnrollAndJam &) {
attachDirectiveToLoop(dir, &eval);
},
+ [&](const Fortran::parser::CompilerDirective::ForceInline &) {
+ attachInliningDirectiveToStmt(dir, &eval);
+ },
+ [&](const Fortran::parser::CompilerDirective::Inline &) {
+ attachInliningDirectiveToStmt(dir, &eval);
+ },
+ [&](const Fortran::parser::CompilerDirective::NoInline &) {
+ attachInliningDirectiveToStmt(dir, &eval);
+ },
[&](const auto &) {}},
dir.u);
}
@@ -5086,7 +5184,9 @@ private:
void genDataAssignment(
const Fortran::evaluate::Assignment &assign,
- const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
+ const Fortran::evaluate::ProcedureRef *userDefinedAssignment,
+ const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs =
+ {}) {
mlir::Location loc = getCurrentLocation();
fir::FirOpBuilder &builder = getFirOpBuilder();
@@ -5166,10 +5266,20 @@ private:
genCUDADataTransfer(builder, loc, assign, lhs, rhs,
isWholeAllocatableAssignment,
keepLhsLengthInAllocatableAssignment);
- else
+ else {
+ // If RHS or LHS have a CallOp in their expression, this operation will
+ // have the 'no_inline' or 'always_inline' attribute if there is a
+ // directive just before the assignement.
+ if (!dirs.empty()) {
+ if (rhs.getDefiningOp())
+ attachInlineAttributes(*rhs.getDefiningOp(), dirs);
+ if (lhs.getDefiningOp())
+ attachInlineAttributes(*lhs.getDefiningOp(), dirs);
+ }
hlfir::AssignOp::create(builder, loc, rhs, lhs,
isWholeAllocatableAssignment,
keepLhsLengthInAllocatableAssignment);
+ }
if (hasCUDAImplicitTransfer && !isInDeviceContext) {
localSymbols.popScope();
for (mlir::Value temp : implicitTemps)
@@ -5237,16 +5347,21 @@ private:
}
/// Shared for both assignments and pointer assignments.
- void genAssignment(const Fortran::evaluate::Assignment &assign) {
+ void
+ genAssignment(const Fortran::evaluate::Assignment &assign,
+ const llvm::ArrayRef<const Fortran::parser::CompilerDirective *>
+ &dirs = {}) {
mlir::Location loc = toLocation();
if (lowerToHighLevelFIR()) {
Fortran::common::visit(
Fortran::common::visitors{
[&](const Fortran::evaluate::Assignment::Intrinsic &) {
- genDataAssignment(assign, /*userDefinedAssignment=*/nullptr);
+ genDataAssignment(assign, /*userDefinedAssignment=*/nullptr,
+ dirs);
},
[&](const Fortran::evaluate::ProcedureRef &procRef) {
- genDataAssignment(assign, /*userDefinedAssignment=*/&procRef);
+ genDataAssignment(assign, /*userDefinedAssignment=*/&procRef,
+ dirs);
},
[&](const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) {
if (isInsideHlfirForallOrWhere())
@@ -5651,7 +5766,8 @@ private:
}
void genFIR(const Fortran::parser::AssignmentStmt &stmt) {
- genAssignment(*stmt.typedAssignment->v);
+ Fortran::lower::pft::Evaluation &eval = getEval();
+ genAssignment(*stmt.typedAssignment->v, eval.dirs);
}
void genFIR(const Fortran::parser::SyncAllStmt &stmt) {
diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp
index fb72040..9bf994e 100644
--- a/flang/lib/Lower/ConvertCall.cpp
+++ b/flang/lib/Lower/ConvertCall.cpp
@@ -700,9 +700,20 @@ Fortran::lower::genCallOpAndResult(
callResult = dispatch.getResult(0);
} else {
// Standard procedure call with fir.call.
+ fir::FortranInlineEnumAttr inlineAttr;
+
+ if (caller.getCallDescription().hasNoInline())
+ inlineAttr = fir::FortranInlineEnumAttr::get(
+ builder.getContext(), fir::FortranInlineEnum::no_inline);
+ else if (caller.getCallDescription().hasInlineHint())
+ inlineAttr = fir::FortranInlineEnumAttr::get(
+ builder.getContext(), fir::FortranInlineEnum::inline_hint);
+ else if (caller.getCallDescription().hasAlwaysInline())
+ inlineAttr = fir::FortranInlineEnumAttr::get(
+ builder.getContext(), fir::FortranInlineEnum::always_inline);
auto call = fir::CallOp::create(
builder, loc, funcType.getResults(), funcSymbolAttr, operands,
- /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs);
+ /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs, inlineAttr);
callNumResults = call.getNumResults();
if (callNumResults != 0)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 39bac81..53fe9c0 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -50,6 +50,7 @@
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -358,6 +359,14 @@ static constexpr IntrinsicHandler handlers[]{
&I::genBarrierInit,
{{{"barrier", asAddr}, {"count", asValue}}},
/*isElemental=*/false},
+ {"barrier_try_wait",
+ &I::genBarrierTryWait,
+ {{{"barrier", asAddr}, {"token", asValue}}},
+ /*isElemental=*/false},
+ {"barrier_try_wait_sleep",
+ &I::genBarrierTryWaitSleep,
+ {{{"barrier", asAddr}, {"token", asValue}, {"ns", asValue}}},
+ /*isElemental=*/false},
{"bessel_jn",
&I::genBesselJn,
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -1036,10 +1045,87 @@ static constexpr IntrinsicHandler handlers[]{
{"dst", asAddr},
{"nbytes", asValue}}},
/*isElemental=*/false},
+ {"tma_bulk_ldc4",
+ &I::genTMABulkLoadC4,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldc8",
+ &I::genTMABulkLoadC8,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldi4",
+ &I::genTMABulkLoadI4,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldi8",
+ &I::genTMABulkLoadI8,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldr2",
+ &I::genTMABulkLoadR2,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldr4",
+ &I::genTMABulkLoadR4,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_ldr8",
+ &I::genTMABulkLoadR8,
+ {{{"barrier", asAddr},
+ {"src", asAddr},
+ {"dst", asAddr},
+ {"nelems", asValue}}},
+ /*isElemental=*/false},
{"tma_bulk_s2g",
&I::genTMABulkS2G,
{{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}},
/*isElemental=*/false},
+ {"tma_bulk_store_c4",
+ &I::genTMABulkStoreC4,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_c8",
+ &I::genTMABulkStoreC8,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_i4",
+ &I::genTMABulkStoreI4,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_i8",
+ &I::genTMABulkStoreI8,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_r2",
+ &I::genTMABulkStoreR2,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_r4",
+ &I::genTMABulkStoreR4,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
+ {"tma_bulk_store_r8",
+ &I::genTMABulkStoreR8,
+ {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}},
+ /*isElemental=*/false},
{"tma_bulk_wait_group",
&I::genTMABulkWaitGroup,
{{}},
@@ -3282,6 +3368,57 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) {
mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
}
+// BARRIER_TRY_WAIT (CUDA)
+mlir::Value
+IntrinsicLibrary::genBarrierTryWait(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ mlir::Value res = fir::AllocaOp::create(builder, loc, resultType);
+ mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0);
+ fir::StoreOp::create(builder, loc, zero, res);
+ mlir::Value ns =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 1000000);
+ mlir::Value load = fir::LoadOp::create(builder, loc, res);
+ auto whileOp = mlir::scf::WhileOp::create(
+ builder, loc, mlir::TypeRange{resultType}, mlir::ValueRange{load});
+ mlir::Block *beforeBlock = builder.createBlock(&whileOp.getBefore());
+ mlir::Value beforeArg = beforeBlock->addArgument(resultType, loc);
+ builder.setInsertionPointToStart(beforeBlock);
+ mlir::Value condition = mlir::arith::CmpIOp::create(
+ builder, loc, mlir::arith::CmpIPredicate::ne, beforeArg, zero);
+ mlir::scf::ConditionOp::create(builder, loc, condition, beforeArg);
+ mlir::Block *afterBlock = builder.createBlock(&whileOp.getAfter());
+ afterBlock->addArgument(resultType, loc);
+ builder.setInsertionPointToStart(afterBlock);
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
+ auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
+ mlir::Value ret =
+ mlir::NVVM::InlinePtxOp::create(
+ builder, loc, {resultType}, {barrier, args[1], ns}, {},
+ ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
+ "selp.b32 %0, 1, 0, p;",
+ {})
+ .getResult(0);
+ mlir::scf::YieldOp::create(builder, loc, ret);
+ builder.setInsertionPointAfter(whileOp);
+ return whileOp.getResult(0);
+}
+
+// BARRIER_TRY_WAIT_SLEEP (CUDA)
+mlir::Value
+IntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 3);
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
+ auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]);
+ return mlir::NVVM::InlinePtxOp::create(
+ builder, loc, {resultType}, {barrier, args[1], args[2]}, {},
+ ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; "
+ "selp.b32 %0, 1, 0, p;",
+ {})
+ .getResult(0);
+}
+
// BESSEL_JN
fir::ExtendedValue
IntrinsicLibrary::genBesselJn(mlir::Type resultType,
@@ -9218,6 +9355,93 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) {
builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
}
+static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value barrier, mlir::Value src,
+ mlir::Value dst, mlir::Value nelem,
+ mlir::Value eleSize) {
+ mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize);
+ auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
+ barrier = builder.createConvert(loc, llvmPtrTy, barrier);
+ mlir::NVVM::InlinePtxOp::create(
+ builder, loc, mlir::TypeRange{}, {dst, src, size, barrier}, {},
+ "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], "
+ "[%1], %2, [%3];",
+ {});
+ mlir::NVVM::InlinePtxOp::create(
+ builder, loc, mlir::TypeRange{}, {barrier, size}, {},
+ "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;", {});
+}
+
+// TMA_BULK_LOADC4
+void IntrinsicLibrary::genTMABulkLoadC4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADC8
+void IntrinsicLibrary::genTMABulkLoadC8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 16);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADI4
+void IntrinsicLibrary::genTMABulkLoadI4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 4);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADI8
+void IntrinsicLibrary::genTMABulkLoadI8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADR2
+void IntrinsicLibrary::genTMABulkLoadR2(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 2);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADR4
+void IntrinsicLibrary::genTMABulkLoadR4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 4);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
+// TMA_BULK_LOADR8
+void IntrinsicLibrary::genTMABulkLoadR8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 4);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), fir::getBase(args[3]), eleSize);
+}
+
// TMA_BULK_S2G (CUDA)
void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
@@ -9227,6 +9451,97 @@ void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
mlir::NVVM::NVVMMemorySpace::Global);
mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+
+ mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {},
+ "cp.async.bulk.commit_group", {});
+ mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc,
+ builder.getI32IntegerAttr(0), {});
+}
+
+static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc,
+ mlir::Value src, mlir::Value dst, mlir::Value count,
+ mlir::Value eleSize) {
+ mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count);
+ src = convertPtrToNVVMSpace(builder, loc, src,
+ mlir::NVVM::NVVMMemorySpace::Shared);
+ dst = convertPtrToNVVMSpace(builder, loc, dst,
+ mlir::NVVM::NVVMMemorySpace::Global);
+ mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(builder, loc, dst, src,
+ size, {}, {});
+ mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {},
+ "cp.async.bulk.commit_group", {});
+ mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc,
+ builder.getI32IntegerAttr(0), {});
+}
+
+// TMA_BULK_STORE_C4 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreC4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_C8 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreC8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 16);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_I4 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreI4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 4);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_I8 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreI8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_R2 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreR2(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 2);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_R4 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreR4(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 4);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
+}
+
+// TMA_BULK_STORE_R8 (CUDA)
+void IntrinsicLibrary::genTMABulkStoreR8(
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 3);
+ mlir::Value eleSize =
+ builder.createIntegerConstant(loc, builder.getI32Type(), 8);
+ genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]),
+ fir::getBase(args[2]), eleSize);
}
// TMA_BULK_WAIT_GROUP (CUDA)
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 478ab15..ca4aefb 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -680,6 +680,18 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr())
llvmCall.setResAttrsAttr(resAttrs);
+ if (auto inlineAttr = call.getInlineAttrAttr()) {
+ llvmCall->removeAttr("inline_attr");
+ if (inlineAttr.getValue() == fir::FortranInlineEnum::no_inline) {
+ llvmCall.setNoInlineAttr(rewriter.getUnitAttr());
+ } else if (inlineAttr.getValue() == fir::FortranInlineEnum::inline_hint) {
+ llvmCall.setInlineHintAttr(rewriter.getUnitAttr());
+ } else if (inlineAttr.getValue() ==
+ fir::FortranInlineEnum::always_inline) {
+ llvmCall.setAlwaysInlineAttr(rewriter.getUnitAttr());
+ }
+ }
+
if (memAttr)
llvmCall.setMemoryEffectsAttr(
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 0776346..8ca2869 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -143,7 +143,8 @@ public:
llvm::SmallVector<mlir::Type> operandsTypes;
for (auto arg : gpuLaunchFunc.getKernelOperands())
operandsTypes.push_back(arg.getType());
- auto fctTy = mlir::FunctionType::get(&context, operandsTypes, {});
+ auto fctTy = mlir::FunctionType::get(&context, operandsTypes,
+ gpuLaunchFunc.getResultTypes());
if (!hasPortableSignature(fctTy, op))
convertCallOp(gpuLaunchFunc, fctTy);
} else if (auto addr = mlir::dyn_cast<fir::AddrOfOp>(op)) {
@@ -520,10 +521,14 @@ public:
llvm::SmallVector<mlir::Value, 1> newCallResults;
// TODO propagate/update call argument and result attributes.
if constexpr (std::is_same_v<std::decay_t<A>, mlir::gpu::LaunchFuncOp>) {
+ mlir::Value asyncToken = callOp.getAsyncToken();
auto newCall = A::create(*rewriter, loc, callOp.getKernel(),
callOp.getGridSizeOperandValues(),
callOp.getBlockSizeOperandValues(),
- callOp.getDynamicSharedMemorySize(), newOpers);
+ callOp.getDynamicSharedMemorySize(), newOpers,
+ asyncToken ? asyncToken.getType() : nullptr,
+ callOp.getAsyncDependencies(),
+ /*clusterSize=*/std::nullopt);
if (callOp.getClusterSizeX())
newCall.getClusterSizeXMutable().assign(callOp.getClusterSizeX());
if (callOp.getClusterSizeY())
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index d0164f3..4f97aca 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -4484,7 +4484,7 @@ void fir::IfOp::getSuccessorRegions(
llvm::SmallVectorImpl<mlir::RegionSuccessor> &regions) {
// The `then` and the `else` region branch back to the parent operation.
if (!point.isParent()) {
- regions.push_back(mlir::RegionSuccessor(getResults()));
+ regions.push_back(mlir::RegionSuccessor(getOperation(), getResults()));
return;
}
@@ -4494,7 +4494,8 @@ void fir::IfOp::getSuccessorRegions(
// Don't consider the else region if it is empty.
mlir::Region *elseRegion = &this->getElseRegion();
if (elseRegion->empty())
- regions.push_back(mlir::RegionSuccessor());
+ regions.push_back(
+ mlir::RegionSuccessor(getOperation(), getOperation()->getResults()));
else
regions.push_back(mlir::RegionSuccessor(elseRegion));
}
@@ -4513,7 +4514,7 @@ void fir::IfOp::getEntrySuccessorRegions(
if (!getElseRegion().empty())
regions.emplace_back(&getElseRegion());
else
- regions.emplace_back(getResults());
+ regions.emplace_back(getOperation(), getOperation()->getResults());
}
}
diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
index 25a8f7a..8c0acc5 100644
--- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp
@@ -246,7 +246,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> {
args.append(dispatch.getArgs().begin(), dispatch.getArgs().end());
rewriter.replaceOpWithNewOp<fir::CallOp>(
dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(),
- dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr());
+ dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(),
+ /*inline_attr*/ fir::FortranInlineEnumAttr{});
return mlir::success();
}
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index d33a18f..59fe7d8 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1314,6 +1314,11 @@ constexpr auto novector{"NOVECTOR" >> construct<CompilerDirective::NoVector>()};
constexpr auto nounroll{"NOUNROLL" >> construct<CompilerDirective::NoUnroll>()};
constexpr auto nounrollAndJam{
"NOUNROLL_AND_JAM" >> construct<CompilerDirective::NoUnrollAndJam>()};
+constexpr auto forceinlineDir{
+ "FORCEINLINE" >> construct<CompilerDirective::ForceInline>()};
+constexpr auto noinlineDir{
+ "NOINLINE" >> construct<CompilerDirective::NoInline>()};
+constexpr auto inlineDir{"INLINE" >> construct<CompilerDirective::Inline>()};
TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
sourced((construct<CompilerDirective>(ignore_tkr) ||
construct<CompilerDirective>(loopCount) ||
@@ -1324,6 +1329,9 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >>
construct<CompilerDirective>(novector) ||
construct<CompilerDirective>(nounrollAndJam) ||
construct<CompilerDirective>(nounroll) ||
+ construct<CompilerDirective>(noinlineDir) ||
+ construct<CompilerDirective>(forceinlineDir) ||
+ construct<CompilerDirective>(inlineDir) ||
construct<CompilerDirective>(
many(construct<CompilerDirective::NameValue>(
name, maybe(("="_tok || ":"_tok) >> digitString64))))) /
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index d1e081c..4159d2e 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -275,6 +275,13 @@ struct SpecificModifierParser {
// --- Iterator helpers -----------------------------------------------
+static EntityDecl MakeEntityDecl(ObjectName &&name) {
+ return EntityDecl(
+ /*ObjectName=*/std::move(name), std::optional<ArraySpec>{},
+ std::optional<CoarraySpec>{}, std::optional<CharLength>{},
+ std::optional<Initialization>{});
+}
+
// [5.0:47:17-18] In an iterator-specifier, if the iterator-type is not
// specified then the type of that iterator is default integer.
// [5.0:49:14] The iterator-type must be an integer type.
@@ -282,11 +289,7 @@ static std::list<EntityDecl> makeEntityList(std::list<ObjectName> &&names) {
std::list<EntityDecl> entities;
for (auto iter = names.begin(), end = names.end(); iter != end; ++iter) {
- EntityDecl entityDecl(
- /*ObjectName=*/std::move(*iter), std::optional<ArraySpec>{},
- std::optional<CoarraySpec>{}, std::optional<CharLength>{},
- std::optional<Initialization>{});
- entities.push_back(std::move(entityDecl));
+ entities.push_back(MakeEntityDecl(std::move(*iter)));
}
return entities;
}
@@ -306,6 +309,217 @@ static TypeDeclarationStmt makeIterSpecDecl(std::list<ObjectName> &&names) {
makeEntityList(std::move(names)));
}
+// --- Stylized expression handling -----------------------------------
+
+// OpenMP has a concept of am "OpenMP stylized expression". Syntactially
+// it looks like a typical Fortran expression (or statement), except:
+// - the only variables allowed in it are OpenMP special variables, the
+// exact set of these variables depends on the specific case of the
+// stylized expression
+// - the special OpenMP variables present may assume one or more types,
+// and the expression should be semantically valid for each type.
+//
+// The stylized expression can be thought of as a template, which will be
+// instantiated for each type provided somewhere in the context in which
+// the stylized expression appears.
+//
+// AST nodes:
+// - OmpStylizedExpression: contains the source string for the expression,
+// plus the list of instances (OmpStylizedInstance).
+// - OmpStylizedInstance: corresponds to the instantiation of the stylized
+// expression for a specific type. The way that the type is specified is
+// by creating declarations (OmpStylizedDeclaration) for the special
+// variables. Together with the AST tree corresponding to the stylized
+// expression the instantiation has enough information for semantic
+// analysis. Each instance has its own scope, and the special variables
+// have their own Symbol's (local to the scope).
+// - OmpStylizedDeclaration: encapsulates the information that the visitors
+// in resolve-names can use to "emulate" a declaration for a special
+// variable and allow name resolution in the instantiation AST to work.
+//
+// Implementation specifics:
+// The semantic analysis stores "evaluate::Expr" in each AST node rooted
+// in parser::Expr (in the typedExpr member). The evaluate::Expr is specific
+// to a given type, and so to allow different types for a given expression,
+// for each type a separate copy of the parser::Expr subtree is created.
+// Normally, AST nodes are non-copyable (copy-ctor is deleted), so to create
+// several copies of a subtree, the same source string is parsed several
+// times. The ParseState member in OmpStylizedExpression is the parser state
+// immediately before the stylized expression.
+//
+// Initially, when OmpStylizedExpression is first created, the expression is
+// parsed as if it was an actual code, but this parsing is only done to
+// establish where the stylized expression ends (in the source). The source
+// and the initial parser state are stored in the object, and the instance
+// list is empty.
+// Once the parsing of the containing OmpDirectiveSpecification completes,
+// a post-processing "parser" (OmpStylizedInstanceCreator) executes. This
+// post-processor examines the directive specification to see if it expects
+// any stylized expressions to be contained in it, and then instantiates
+// them for each such directive.
+
+template <typename A> struct NeverParser {
+ using resultType = A;
+ std::optional<resultType> Parse(ParseState &state) const {
+ // Always fail, but without any messages.
+ return std::nullopt;
+ }
+};
+
+template <typename A> constexpr auto never() { return NeverParser<A>{}; }
+
+// Parser for optional<T> which always succeeds and returns std::nullptr.
+// It's only needed to produce "std::optional<CallStmt::Chevrons>" in
+// CallStmt.
+template <typename A, typename B = void> struct NullParser;
+template <typename B> struct NullParser<std::optional<B>> {
+ using resultType = std::optional<B>;
+ std::optional<resultType> Parse(ParseState &) const {
+ return resultType{std::nullopt};
+ }
+};
+
+template <typename A> constexpr auto null() { return NullParser<A>{}; }
+
+// OmpStylizedDeclaration and OmpStylizedInstance are helper classes, and
+// don't correspond to anything in the source. Their parsers should still
+// exist, but they should never be executed.
+TYPE_PARSER(construct<OmpStylizedDeclaration>(never<OmpStylizedDeclaration>()))
+TYPE_PARSER(construct<OmpStylizedInstance>(never<OmpStylizedInstance>()))
+
+TYPE_PARSER( //
+ construct<OmpStylizedInstance::Instance>(Parser<AssignmentStmt>{}) ||
+ construct<OmpStylizedInstance::Instance>(
+ sourced(construct<CallStmt>(Parser<ProcedureDesignator>{},
+ null<std::optional<CallStmt::Chevrons>>(),
+ parenthesized(optionalList(actualArgSpec))))) ||
+ construct<OmpStylizedInstance::Instance>(indirect(expr)))
+
+struct OmpStylizedExpressionParser {
+ using resultType = OmpStylizedExpression;
+
+ std::optional<resultType> Parse(ParseState &state) const {
+ auto *saved{new ParseState(state)};
+ auto getSource{verbatim(Parser<OmpStylizedInstance::Instance>{} >> ok)};
+ if (auto &&ok{getSource.Parse(state)}) {
+ OmpStylizedExpression result{std::list<OmpStylizedInstance>{}};
+ result.source = ok->source;
+ result.state = saved;
+ // result.v remains empty
+ return std::move(result);
+ }
+ delete saved;
+ return std::nullopt;
+ }
+};
+
+static void Instantiate(OmpStylizedExpression &ose,
+ llvm::ArrayRef<const OmpTypeName *> types, llvm::ArrayRef<CharBlock> vars) {
+ // 1. For each var in the vars list, declare it with the corresponding
+ // type from types.
+ // 2. Run the parser to get the AST for the stylized expression.
+ // 3. Create OmpStylizedInstance and append it to the list in ose.
+ assert(types.size() == vars.size() && "List size mismatch");
+ // A ParseState object is irreversibly modified during parsing (in
+ // particular, it cannot be rewound to an earlier position in the source).
+ // Because of that we need to create a local copy for each instantiation.
+ // If rewinding was possible, we could just use the current one, and we
+ // wouldn't need to save it in the AST node.
+ ParseState state{DEREF(ose.state)};
+
+ std::list<OmpStylizedDeclaration> decls;
+ for (auto [type, var] : llvm::zip_equal(types, vars)) {
+ decls.emplace_back(OmpStylizedDeclaration{
+ common::Reference(*type), MakeEntityDecl(Name{var})});
+ }
+
+ if (auto &&instance{Parser<OmpStylizedInstance::Instance>{}.Parse(state)}) {
+ ose.v.emplace_back(
+ OmpStylizedInstance{std::move(decls), std::move(*instance)});
+ }
+}
+
+static void InstantiateForTypes(OmpStylizedExpression &ose,
+ const OmpTypeNameList &typeNames, llvm::ArrayRef<CharBlock> vars) {
+ // For each type in the type list, declare all variables in vars with
+ // that type, and complete the instantiation.
+ for (const OmpTypeName &t : typeNames.v) {
+ std::vector<const OmpTypeName *> types(vars.size(), &t);
+ Instantiate(ose, types, vars);
+ }
+}
+
+static void InstantiateDeclareReduction(OmpDirectiveSpecification &spec) {
+ // There can be arguments/clauses that don't make sense, that analysis
+ // is left until semantic checks. Tolerate any unexpected stuff.
+ auto *rspec{GetFirstArgument<OmpReductionSpecifier>(spec)};
+ if (!rspec) {
+ return;
+ }
+
+ const OmpTypeNameList *typeNames{nullptr};
+
+ if (auto *cexpr{
+ const_cast<OmpCombinerExpression *>(GetCombinerExpr(*rspec))}) {
+ typeNames = &std::get<OmpTypeNameList>(rspec->t);
+
+ InstantiateForTypes(*cexpr, *typeNames, OmpCombinerExpression::Variables());
+ delete cexpr->state;
+ cexpr->state = nullptr;
+ } else {
+ // If there are no types, there is nothing else to do.
+ return;
+ }
+
+ for (const OmpClause &clause : spec.Clauses().v) {
+ llvm::omp::Clause id{clause.Id()};
+ if (id == llvm::omp::Clause::OMPC_initializer) {
+ if (auto *iexpr{const_cast<OmpInitializerExpression *>(
+ GetInitializerExpr(clause))}) {
+ InstantiateForTypes(
+ *iexpr, *typeNames, OmpInitializerExpression::Variables());
+ delete iexpr->state;
+ iexpr->state = nullptr;
+ }
+ }
+ }
+}
+
+static void InstantiateStylizedDirective(OmpDirectiveSpecification &spec) {
+ const OmpDirectiveName &dirName{spec.DirName()};
+ if (dirName.v == llvm::omp::Directive::OMPD_declare_reduction) {
+ InstantiateDeclareReduction(spec);
+ }
+}
+
+template <typename P,
+ typename = std::enable_if_t<
+ std::is_same_v<typename P::resultType, OmpDirectiveSpecification>>>
+struct OmpStylizedInstanceCreator {
+ using resultType = OmpDirectiveSpecification;
+ constexpr OmpStylizedInstanceCreator(P p) : parser_(p) {}
+
+ std::optional<resultType> Parse(ParseState &state) const {
+ if (auto &&spec{parser_.Parse(state)}) {
+ InstantiateStylizedDirective(*spec);
+ return std::move(spec);
+ }
+ return std::nullopt;
+ }
+
+private:
+ const P parser_;
+};
+
+template <typename P>
+OmpStylizedInstanceCreator(P) -> OmpStylizedInstanceCreator<P>;
+
+// --- Parsers for types ----------------------------------------------
+
+TYPE_PARSER( //
+ sourced(construct<OmpTypeName>(Parser<DeclarationTypeSpec>{})) ||
+ sourced(construct<OmpTypeName>(Parser<TypeSpec>{})))
+
// --- Parsers for arguments ------------------------------------------
// At the moment these are only directive arguments. This is needed for
@@ -366,10 +580,6 @@ struct OmpArgumentListParser {
}
};
-TYPE_PARSER( //
- construct<OmpTypeName>(Parser<DeclarationTypeSpec>{}) ||
- construct<OmpTypeName>(Parser<TypeSpec>{}))
-
// 2.15.3.6 REDUCTION (reduction-identifier: variable-name-list)
TYPE_PARSER(construct<OmpReductionIdentifier>(Parser<DefinedOperator>{}) ||
construct<OmpReductionIdentifier>(Parser<ProcedureDesignator>{}))
@@ -1065,7 +1275,8 @@ TYPE_PARSER(construct<OmpOtherwiseClause>(
TYPE_PARSER(construct<OmpWhenClause>(
maybe(nonemptyList(Parser<OmpWhenClause::Modifier>{}) / ":"),
- maybe(indirect(Parser<OmpDirectiveSpecification>{}))))
+ maybe(indirect(
+ OmpStylizedInstanceCreator(Parser<OmpDirectiveSpecification>{})))))
// OMP 5.2 12.6.1 grainsize([ prescriptiveness :] scalar-integer-expression)
TYPE_PARSER(construct<OmpGrainsizeClause>(
@@ -1777,12 +1988,7 @@ TYPE_PARSER(
Parser<OpenMPInteropConstruct>{})) /
endOfLine)
-TYPE_PARSER(construct<OmpInitializerProc>(Parser<ProcedureDesignator>{},
- parenthesized(many(maybe(","_tok) >> Parser<ActualArgSpec>{}))))
-
-TYPE_PARSER(construct<OmpInitializerClause>(
- construct<OmpInitializerClause>(assignmentStmt) ||
- construct<OmpInitializerClause>(Parser<OmpInitializerProc>{})))
+TYPE_PARSER(construct<OmpInitializerClause>(Parser<OmpInitializerExpression>{}))
// OpenMP 5.2: 7.5.4 Declare Variant directive
TYPE_PARSER(sourced(construct<OmpDeclareVariantDirective>(
@@ -1794,7 +2000,7 @@ TYPE_PARSER(sourced(construct<OmpDeclareVariantDirective>(
TYPE_PARSER(sourced(construct<OpenMPDeclareReductionConstruct>(
predicated(Parser<OmpDirectiveName>{},
IsDirective(llvm::omp::Directive::OMPD_declare_reduction)) >=
- Parser<OmpDirectiveSpecification>{})))
+ OmpStylizedInstanceCreator(Parser<OmpDirectiveSpecification>{}))))
// 2.10.6 Declare Target Construct
TYPE_PARSER(sourced(construct<OpenMPDeclareTargetConstruct>(
@@ -1832,8 +2038,8 @@ TYPE_PARSER(sourced(construct<OpenMPDeclareMapperConstruct>(
IsDirective(llvm::omp::Directive::OMPD_declare_mapper)) >=
Parser<OmpDirectiveSpecification>{})))
-TYPE_PARSER(construct<OmpCombinerExpression>(Parser<AssignmentStmt>{}) ||
- construct<OmpCombinerExpression>(Parser<FunctionReference>{}))
+TYPE_PARSER(construct<OmpCombinerExpression>(OmpStylizedExpressionParser{}))
+TYPE_PARSER(construct<OmpInitializerExpression>(OmpStylizedExpressionParser{}))
TYPE_PARSER(sourced(construct<OpenMPCriticalConstruct>(
OmpBlockConstructParser{llvm::omp::Directive::OMPD_critical})))
diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp
index 937a17f..95ad3f6 100644
--- a/flang/lib/Parser/openmp-utils.cpp
+++ b/flang/lib/Parser/openmp-utils.cpp
@@ -74,4 +74,16 @@ const BlockConstruct *GetFortranBlockConstruct(
return nullptr;
}
+const OmpCombinerExpression *GetCombinerExpr(
+ const OmpReductionSpecifier &rspec) {
+ return addr_if(std::get<std::optional<OmpCombinerExpression>>(rspec.t));
+}
+
+const OmpInitializerExpression *GetInitializerExpr(const OmpClause &init) {
+ if (auto *wrapped{std::get_if<OmpClause::Initializer>(&init.u)}) {
+ return &wrapped->v.v;
+ }
+ return nullptr;
+}
+
} // namespace Fortran::parser::omp
diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp
index 8cbaa39..ad0016e 100644
--- a/flang/lib/Parser/parse-tree.cpp
+++ b/flang/lib/Parser/parse-tree.cpp
@@ -11,6 +11,7 @@
#include "flang/Common/indirection.h"
#include "flang/Parser/tools.h"
#include "flang/Parser/user-state.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/Frontend/OpenMP/OMP.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -430,4 +431,30 @@ const OmpClauseList &OmpDirectiveSpecification::Clauses() const {
}
return empty;
}
+
+static bool InitCharBlocksFromStrings(llvm::MutableArrayRef<CharBlock> blocks,
+ llvm::ArrayRef<std::string> strings) {
+ for (auto [i, n] : llvm::enumerate(strings)) {
+ blocks[i] = CharBlock(n);
+ }
+ return true;
+}
+
+// The names should have static storage duration. Keep these names
+// in a sigle place.
+llvm::ArrayRef<CharBlock> OmpCombinerExpression::Variables() {
+ static std::string names[]{"omp_in", "omp_out"};
+ static CharBlock vars[std::size(names)];
+
+ [[maybe_unused]] static bool init = InitCharBlocksFromStrings(vars, names);
+ return vars;
+}
+
+llvm::ArrayRef<CharBlock> OmpInitializerExpression::Variables() {
+ static std::string names[]{"omp_orig", "omp_priv"};
+ static CharBlock vars[std::size(names)];
+
+ [[maybe_unused]] static bool init = InitCharBlocksFromStrings(vars, names);
+ return vars;
+}
} // namespace Fortran::parser
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 2f86c76..9b38cfc 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1867,6 +1867,13 @@ public:
[&](const CompilerDirective::NoUnrollAndJam &) {
Word("!DIR$ NOUNROLL_AND_JAM");
},
+ [&](const CompilerDirective::ForceInline &) {
+ Word("!DIR$ FORCEINLINE");
+ },
+ [&](const CompilerDirective::Inline &) { Word("!DIR$ INLINE"); },
+ [&](const CompilerDirective::NoInline &) {
+ Word("!DIR$ NOINLINE");
+ },
[&](const CompilerDirective::Unrecognized &) {
Word("!DIR$ ");
Word(x.source.ToString());
@@ -2088,15 +2095,13 @@ public:
// OpenMP Clauses & Directives
void Unparse(const OmpArgumentList &x) { Walk(x.v, ", "); }
+ void Unparse(const OmpTypeNameList &x) { Walk(x.v, ", "); }
void Unparse(const OmpBaseVariantNames &x) {
Walk(std::get<0>(x.t)); // OmpObject
Put(":");
Walk(std::get<1>(x.t)); // OmpObject
}
- void Unparse(const OmpTypeNameList &x) { //
- Walk(x.v, ",");
- }
void Unparse(const OmpMapperSpecifier &x) {
const auto &mapperName{std::get<std::string>(x.t)};
if (mapperName.find(llvm::omp::OmpDefaultMapperName) == std::string::npos) {
@@ -2195,6 +2200,15 @@ public:
unsigned ompVersion{langOpts_.OpenMPVersion};
Word(llvm::omp::getOpenMPDirectiveName(x.v, ompVersion));
}
+ void Unparse(const OmpStylizedDeclaration &x) {
+ // empty
+ }
+ void Unparse(const OmpStylizedExpression &x) { //
+ Put(x.source.ToString());
+ }
+ void Unparse(const OmpStylizedInstance &x) {
+ // empty
+ }
void Unparse(const OmpIteratorSpecifier &x) {
Walk(std::get<TypeDeclarationStmt>(x.t));
Put(" = ");
@@ -2504,29 +2518,11 @@ public:
void Unparse(const OpenMPCriticalConstruct &x) {
Unparse(static_cast<const OmpBlockConstruct &>(x));
}
- void Unparse(const OmpInitializerProc &x) {
- Walk(std::get<ProcedureDesignator>(x.t));
- Put("(");
- Walk(std::get<std::list<ActualArgSpec>>(x.t));
- Put(")");
- }
- void Unparse(const OmpInitializerClause &x) {
- // Don't let the visitor go to the normal AssignmentStmt Unparse function,
- // it adds an extra newline that we don't want.
- if (const auto *assignment{std::get_if<AssignmentStmt>(&x.u)}) {
- Walk(assignment->t, " = ");
- } else {
- Walk(x.u);
- }
+ void Unparse(const OmpInitializerExpression &x) {
+ Unparse(static_cast<const OmpStylizedExpression &>(x));
}
void Unparse(const OmpCombinerExpression &x) {
- // Don't let the visitor go to the normal AssignmentStmt Unparse function,
- // it adds an extra newline that we don't want.
- if (const auto *assignment{std::get_if<AssignmentStmt>(&x.u)}) {
- Walk(assignment->t, " = ");
- } else {
- Walk(x.u);
- }
+ Unparse(static_cast<const OmpStylizedExpression &>(x));
}
void Unparse(const OpenMPDeclareReductionConstruct &x) {
BeginOpenMP();
diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp
index 104df25..a651a87 100644
--- a/flang/lib/Semantics/canonicalize-directives.cpp
+++ b/flang/lib/Semantics/canonicalize-directives.cpp
@@ -60,7 +60,11 @@ static bool IsExecutionDirective(const parser::CompilerDirective &dir) {
std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(dir.u) ||
std::holds_alternative<parser::CompilerDirective::NoVector>(dir.u) ||
std::holds_alternative<parser::CompilerDirective::NoUnroll>(dir.u) ||
- std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(dir.u);
+ std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(
+ dir.u) ||
+ std::holds_alternative<parser::CompilerDirective::ForceInline>(dir.u) ||
+ std::holds_alternative<parser::CompilerDirective::Inline>(dir.u) ||
+ std::holds_alternative<parser::CompilerDirective::NoInline>(dir.u);
}
void CanonicalizationOfDirectives::Post(parser::SpecificationPart &spec) {
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 196755e..628068f 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -26,6 +26,8 @@
#include "flang/Semantics/symbol.h"
#include "flang/Semantics/tools.h"
#include "flang/Support/Flags.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/Frontend/OpenMP/OMP.h.inc"
#include "llvm/Support/Debug.h"
#include <list>
@@ -453,6 +455,21 @@ public:
return true;
}
+ bool Pre(const parser::OmpStylizedDeclaration &x) {
+ static llvm::StringMap<Symbol::Flag> map{
+ {"omp_in", Symbol::Flag::OmpInVar},
+ {"omp_orig", Symbol::Flag::OmpOrigVar},
+ {"omp_out", Symbol::Flag::OmpOutVar},
+ {"omp_priv", Symbol::Flag::OmpPrivVar},
+ };
+ if (auto &name{std::get<parser::ObjectName>(x.var.t)}; name.symbol) {
+ if (auto found{map.find(name.ToString())}; found != map.end()) {
+ ResolveOmp(name, found->second,
+ const_cast<Scope &>(DEREF(name.symbol).owner()));
+ }
+ }
+ return false;
+ }
bool Pre(const parser::OmpMetadirectiveDirective &x) {
PushContext(x.v.source, llvm::omp::Directive::OMPD_metadirective);
return true;
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 561ebd2..0e6d4c7 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -1605,6 +1605,12 @@ public:
Post(static_cast<const parser::OmpDirectiveSpecification &>(x));
}
+ void Post(const parser::OmpTypeName &);
+ bool Pre(const parser::OmpStylizedDeclaration &);
+ void Post(const parser::OmpStylizedDeclaration &);
+ bool Pre(const parser::OmpStylizedInstance &);
+ void Post(const parser::OmpStylizedInstance &);
+
bool Pre(const parser::OpenMPDeclareMapperConstruct &x) {
AddOmpSourceRange(x.source);
return true;
@@ -1615,18 +1621,6 @@ public:
return true;
}
- bool Pre(const parser::OmpInitializerProc &x) {
- auto &procDes = std::get<parser::ProcedureDesignator>(x.t);
- auto &name = std::get<parser::Name>(procDes.u);
- auto *symbol{FindSymbol(NonDerivedTypeScope(), name)};
- if (!symbol) {
- context().Say(name.source,
- "Implicit subroutine declaration '%s' in DECLARE REDUCTION"_err_en_US,
- name.source);
- }
- return true;
- }
-
bool Pre(const parser::OmpDeclareVariantDirective &x) {
AddOmpSourceRange(x.source);
return true;
@@ -1772,14 +1766,6 @@ public:
messageHandler().set_currStmtSource(std::nullopt);
}
- bool Pre(const parser::OmpTypeName &x) {
- BeginDeclTypeSpec();
- return true;
- }
- void Post(const parser::OmpTypeName &x) { //
- EndDeclTypeSpec();
- }
-
bool Pre(const parser::OpenMPConstruct &x) {
// Indicate that the current directive is not a declarative one.
declaratives_.push_back(nullptr);
@@ -1835,6 +1821,30 @@ void OmpVisitor::Post(const parser::OmpBlockConstruct &x) {
}
}
+void OmpVisitor::Post(const parser::OmpTypeName &x) {
+ x.declTypeSpec = GetDeclTypeSpec();
+}
+
+bool OmpVisitor::Pre(const parser::OmpStylizedDeclaration &x) {
+ BeginDecl();
+ Walk(x.type.get());
+ Walk(x.var);
+ return true;
+}
+
+void OmpVisitor::Post(const parser::OmpStylizedDeclaration &x) { //
+ EndDecl();
+}
+
+bool OmpVisitor::Pre(const parser::OmpStylizedInstance &x) {
+ PushScope(Scope::Kind::OtherConstruct, nullptr);
+ return true;
+}
+
+void OmpVisitor::Post(const parser::OmpStylizedInstance &x) { //
+ PopScope();
+}
+
bool OmpVisitor::Pre(const parser::OmpMapClause &x) {
auto &mods{OmpGetModifiers(x)};
if (auto *mapper{OmpGetUniqueModifier<parser::OmpMapper>(mods)}) {
@@ -1969,51 +1979,20 @@ void OmpVisitor::ProcessReductionSpecifier(
}
}
- auto &typeList{std::get<parser::OmpTypeNameList>(spec.t)};
-
- // Create a temporary variable declaration for the four variables
- // used in the reduction specifier and initializer (omp_out, omp_in,
- // omp_priv and omp_orig), with the type in the typeList.
- //
- // In theory it would be possible to create only variables that are
- // actually used, but that requires walking the entire parse-tree of the
- // expressions, and finding the relevant variables [there may well be other
- // variables involved too].
- //
- // This allows doing semantic analysis where the type is a derived type
- // e.g omp_out%x = omp_out%x + omp_in%x.
- //
- // These need to be temporary (in their own scope). If they are created
- // as variables in the outer scope, if there's more than one type in the
- // typelist, duplicate symbols will be reported.
- const parser::CharBlock ompVarNames[]{
- {"omp_in", 6}, {"omp_out", 7}, {"omp_priv", 8}, {"omp_orig", 8}};
-
- for (auto &t : typeList.v) {
- PushScope(Scope::Kind::OtherConstruct, nullptr);
- BeginDeclTypeSpec();
- // We need to walk t.u because Walk(t) does it's own BeginDeclTypeSpec.
- Walk(t.u);
+ reductionDetails->AddDecl(declaratives_.back());
- // Only process types we can find. There will be an error later on when
- // a type isn't found.
- if (const DeclTypeSpec *typeSpec{GetDeclTypeSpec()}) {
- reductionDetails->AddType(*typeSpec);
+ // Do not walk OmpTypeNameList. The types on the list will be visited
+ // during procesing of OmpCombinerExpression.
+ Walk(std::get<std::optional<parser::OmpCombinerExpression>>(spec.t));
+ Walk(clauses);
- for (auto &nm : ompVarNames) {
- ObjectEntityDetails details{};
- details.set_type(*typeSpec);
- MakeSymbol(nm, Attrs{}, std::move(details));
- }
+ for (auto &type : std::get<parser::OmpTypeNameList>(spec.t).v) {
+ // The declTypeSpec can be null if there is some semantic error.
+ if (type.declTypeSpec) {
+ reductionDetails->AddType(*type.declTypeSpec);
}
- EndDeclTypeSpec();
- Walk(std::get<std::optional<parser::OmpCombinerExpression>>(spec.t));
- Walk(clauses);
- PopScope();
}
- reductionDetails->AddDecl(declaratives_.back());
-
if (!symbol) {
symbol = &MakeSymbol(mangledName, Attrs{}, std::move(*reductionDetails));
}
@@ -10078,7 +10057,10 @@ void ResolveNamesVisitor::Post(const parser::CompilerDirective &x) {
std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(x.u) ||
std::holds_alternative<parser::CompilerDirective::NoVector>(x.u) ||
std::holds_alternative<parser::CompilerDirective::NoUnroll>(x.u) ||
- std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(x.u)) {
+ std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(x.u) ||
+ std::holds_alternative<parser::CompilerDirective::ForceInline>(x.u) ||
+ std::holds_alternative<parser::CompilerDirective::Inline>(x.u) ||
+ std::holds_alternative<parser::CompilerDirective::NoInline>(x.u)) {
return;
}
if (const auto *tkr{
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 5182950..59af58d 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1998,6 +1998,18 @@ implicit none
! TMA Operations
+ interface barrier_arrive
+ attributes(device) function barrier_arrive(barrier) result(token)
+ integer(8), shared :: barrier
+ integer(8) :: token
+ end function
+ attributes(device) function barrier_arrive_cnt(barrier, count) result(token)
+ integer(8), shared :: barrier
+ integer(4), value :: count
+ integer(8) :: token
+ end function
+ end interface
+
interface
attributes(device) subroutine barrier_init(barrier, count)
integer(8), shared :: barrier
@@ -2005,15 +2017,18 @@ implicit none
end subroutine
end interface
- interface barrier_arrive
- attributes(device) function barrier_arrive(barrier) result(token)
+ interface
+ attributes(device) integer function barrier_try_wait(barrier, token)
integer(8), shared :: barrier
- integer(8) :: token
+ integer(8), value :: token
end function
- attributes(device) function barrier_arrive_cnt(barrier, count) result(token)
+ end interface
+
+ interface
+ attributes(device) integer function barrier_try_wait_sleep(barrier, token, ns)
integer(8), shared :: barrier
- integer(4), value :: count
- integer(8) :: token
+ integer(8), value :: token
+ integer(4), value :: ns
end function
end interface
@@ -2032,7 +2047,13 @@ implicit none
end subroutine
end interface
+ ! --------------------
+ ! Bulk load functions
+ ! --------------------
+
! Generic load, count is in bytes
+ ! -------------------------------
+
interface
attributes(device) subroutine tma_bulk_g2s(barrier, src, dst, nbytes)
!dir$ ignore_tkr src, dst
@@ -2043,6 +2064,74 @@ implicit none
end subroutine
end interface
+ ! Load specific types, count is in elements
+ ! -----------------------------------------
+
+ interface tma_bulk_load
+ attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ complex(4), device :: src(*)
+ complex(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ complex(8), device :: src(*)
+ complex(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(4), device :: src(*)
+ integer(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ integer(8), device :: src(*)
+ integer(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(2), device :: src(*)
+ real(2), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(4), device :: src(*)
+ real(4), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: barrier
+ real(8), device :: src(*)
+ real(8), shared :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+ end interface
+
+ ! --------------------
+ ! Bulk Store functions
+ ! --------------------
+
+ ! Generic store, count is in bytes
+ ! --------------------------------
+
interface
attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
!dir$ ignore_tkr src, dst
@@ -2052,6 +2141,60 @@ implicit none
end subroutine
end interface
+ ! Load specific types, count is in elements
+ ! -----------------------------------------
+
+ interface tma_bulk_store
+ attributes(device) subroutine tma_bulk_store_c4(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ complex(4), shared :: src(*)
+ complex(4), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_c8(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ complex(8), shared :: src(*)
+ complex(8), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_i4(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(4), shared :: src(*)
+ integer(4), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_i8(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ integer(8), shared :: src(*)
+ integer(8), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_r2(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ real(2), shared :: src(*)
+ real(2), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_r4(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ real(4), shared :: src(*)
+ real(4), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+
+ attributes(device) subroutine tma_bulk_store_r8(src, dst, nelems)
+ !dir$ ignore_tkr (r) src, (r) dst
+ real(8), shared :: src(*)
+ real(8), device :: dst(*)
+ integer(4), value :: nelems
+ end subroutine
+ end interface
+
contains
attributes(device) subroutine syncthreads()
diff --git a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
index 48fee10..5562e00 100644
--- a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
+++ b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir
@@ -108,3 +108,23 @@ module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.k
}
}
+// -----
+
+module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
+ gpu.module @testmod {
+ gpu.func @_QPtest(%arg0: complex<f32>) -> () kernel {
+ gpu.return
+ }
+ }
+ func.func @main(%arg0: complex<f32>) {
+ %0 = llvm.mlir.constant(0 : i64) : i64
+ %1 = llvm.mlir.constant(0 : i32) : i32
+ %2 = fir.alloca i64
+ %3 = cuf.stream_cast %2 : !fir.ref<i64>
+ %4 = gpu.launch_func async [%3] @testmod::@_QPtest blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %1 args(%arg0 : complex<f32>) {cuf.proc_attr = #cuf.cuda_proc<global>}
+ return
+ }
+}
+
+// CHECK-LABEL: func.func @main
+// CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @testmod::@_QPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) : i64 dynamic_shared_memory_size %{{.*}} args(%{{.*}} : !fir.vector<2:f32>) {cuf.proc_attr = #cuf.cuda_proc<global>}
diff --git a/flang/test/Integration/inline_directive.f90 b/flang/test/Integration/inline_directive.f90
new file mode 100644
index 0000000..1f05384
--- /dev/null
+++ b/flang/test/Integration/inline_directive.f90
@@ -0,0 +1,69 @@
+! This directory can be used to add Integration tests involving multiple stages of the compiler (for eg. from Fortran to LLVM IR).
+! It should not contain executable tests. We should only add tests here sparingly and only if there is no other way to test.
+! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s
+
+! CHECK-LABEL: test_inline
+subroutine test_inline()
+ integer :: x, y
+!CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4
+!CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4
+!CHECK: %[[VAL_3:.*]] = alloca i32, i64 1, align 4
+!CHECK: %[[VAL_4:.*]] = alloca i32, i64 1, align 4
+
+ !dir$ forceinline
+ y = g(x)
+ !dir$ forceinline
+ call f(x, y)
+!CHECK: %[[VAL_5:.*]] = load i32, ptr %[[VAL_3]], align 4
+!CHECK: %[[VAL_6:.*]] = mul i32 %[[VAL_5]], 2
+!CHECK: store i32 %6, ptr %[[VAL_1]], align 4
+!CHECK: %[[VAL_7:.*]] = load i32, ptr %[[VAL_1]], align 4
+!CHECK: store i32 %7, ptr %[[VAL_2]], align 4
+!CHECK: %[[VAL_8:.]] = load i32, ptr %[[VAL_3]], align 4
+!CHECK: %[[VAL_9:.]] = mul i32 %[[VAL_8]], 2
+!CHECK: store i32 %9, ptr %[[VAL_2]], align 4
+
+ !dir$ inline
+ y = g(x)
+ !dir$ inline
+ call f(x, y)
+!CHECK: %[[VAL_10:.*]] = call i32 @_QFtest_inlinePg(ptr %[[VAL_3]]) #[[INLINE:.*]]
+!CHECK: store i32 %[[VAL_10]], ptr %[[VAL_2]], align 4
+!CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[INLINE]]
+
+ !dir$ inline
+ do i = 1, 100
+ call f(x, y)
+ !CHECK: br i1 %[[VAL_14:.*]], label %[[VAL_15:.*]], label %[[VAL_19:.*]]
+ !CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[INLINE]]
+ enddo
+
+ !dir$ noinline
+ y = g(x)
+ !dir$ noinline
+ call f(x, y)
+!CHECK: %[[VAL_10:.*]] = call i32 @_QFtest_inlinePg(ptr %[[VAL_3]]) #[[NOINLINE:.*]]
+!CHECK: store i32 %[[VAL_10]], ptr %[[VAL_2]], align 4
+!CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[NOINLINE]]
+
+ !dir$ noinline
+ do i = 1, 100
+ call f(x, y)
+ !CHECK: br i1 %[[VAL_14:.*]], label %[[VAL_15:.*]], label %[[VAL_19:.*]]
+ !CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[NOINLINE]]
+ enddo
+
+ contains
+ subroutine f(x, y)
+ integer, intent(in) :: x
+ integer, intent(out) :: y
+ y = x*2
+ end subroutine f
+ integer function g(x)
+ integer :: x
+ g = x*2
+ end function g
+end subroutine test_inline
+
+!CHECK: attributes #[[INLINE]] = { inlinehint }
+!CHECK: attributes #[[NOINLINE]] = { noinline }
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 5c4c3c6..8f35521 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -479,6 +479,8 @@ end subroutine
! CHECK-LABEL: func.func @_QPtest_bulk_s2g
! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
attributes(device) subroutine testAtomicCasLoop(aa, n)
integer :: a
@@ -492,3 +494,250 @@ end subroutine
! CHECK: %[[CASTED_CMP_XCHG_EV:.*]] = fir.convert %[[CMP_XCHG_EV]] : (i1) -> i32
! CHECK: %{{.*}} = arith.constant 1 : i32
! CHECK: %19 = arith.cmpi eq, %[[CASTED_CMP_XCHG_EV]], %{{.*}} : i32
+
+attributes(global) subroutine test_barrier_try_wait()
+ integer :: istat
+ integer(8), shared :: barrier1
+ integer(8) :: token
+ istat = barrier_try_wait(barrier1, token)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_barrier_try_wait()
+! CHECK: scf.while
+! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32
+
+attributes(global) subroutine test_barrier_try_wait_sleep()
+ integer :: istat
+ integer(8), shared :: barrier1
+ integer(8) :: token
+ integer(4) :: sleep_time
+ istat = barrier_try_wait_sleep(barrier1, token, sleep_time)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep()
+! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32
+
+attributes(global) subroutine test_tma_bulk_load_c4(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ complex(4), device :: r8(n)
+ complex(4), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c4
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xcomplex<f32>>>, !fir.ref<complex<f32>>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_c8(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ complex(8), device :: r8(n)
+ complex(8), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c8
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xcomplex<f64>>>, !fir.ref<complex<f64>>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_i4(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ integer(4), device :: r8(n)
+ integer(4), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i4
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<i32>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_i8(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ integer(8), device :: r8(n)
+ integer(8), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i8
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xi64>>, !fir.ref<i64>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_r2(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ real(2), device :: r8(n)
+ real(2), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r2
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r2Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r2Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf16>>, !fir.ref<f16>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_r4(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ real(4), device :: r8(n)
+ real(4), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r4
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf32>>, !fir.ref<f32>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_load_r8(a, n)
+ integer(8), shared :: barrier1
+ integer, value :: n
+ real(8), device :: r8(n)
+ real(8), shared :: tmp(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_load(barrier1, r8(j), tmp, elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r8
+! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>)
+! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32>
+! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32
+! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32
+! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr
+! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf64>>, !fir.ref<f64>, i32, !llvm.ptr)
+! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32)
+
+attributes(global) subroutine test_tma_bulk_store_c4(c, n)
+ integer, value :: n
+ complex(4), device :: c(n)
+ complex(4), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+attributes(global) subroutine test_tma_bulk_store_c8(c, n)
+ integer, value :: n
+ complex(8), device :: c(n)
+ complex(8), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+attributes(global) subroutine test_tma_bulk_store_i4(c, n)
+ integer, value :: n
+ integer(4), device :: c(n)
+ integer(4), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+attributes(global) subroutine test_tma_bulk_store_i8(c, n)
+ integer, value :: n
+ integer(8), device :: c(n)
+ integer(8), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+
+attributes(global) subroutine test_tma_bulk_store_r2(c, n)
+ integer, value :: n
+ real(2), device :: c(n)
+ real(2), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+attributes(global) subroutine test_tma_bulk_store_r4(c, n)
+ integer, value :: n
+ real(4), device :: c(n)
+ real(4), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
+
+attributes(global) subroutine test_tma_bulk_store_r8(c, n)
+ integer, value :: n
+ real(8), device :: c(n)
+ real(8), shared :: tmpa(1024)
+ integer(4) :: j, elem_count
+ call tma_bulk_store(tmpa, c(j), elem_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8
+! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
+! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group"
+! CHECK: nvvm.cp.async.bulk.wait_group 0
diff --git a/flang/test/Lower/inline_directive.f90 b/flang/test/Lower/inline_directive.f90
new file mode 100644
index 0000000..347df85
--- /dev/null
+++ b/flang/test/Lower/inline_directive.f90
@@ -0,0 +1,61 @@
+! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s
+
+subroutine test_inline()
+ integer :: x, y
+!CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_inlineEx"}
+!CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtest_inlineEx"} : (!fir.ref<i32>) -> !fir.ref<i32>
+!CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFtest_inlineEy"}
+!CHECK: %[[VAL_3:.*]] = fir.declare %[[VAL_2]] {uniq_name = "_QFtest_inlineEy"} : (!fir.ref<i32>) -> !fir.ref<i32>
+
+ !dir$ forceinline
+ y = g(x)
+ !CHECK: %[[VAL_4:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>) -> i32
+ !CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref<i32>
+
+ !dir$ forceinline
+ call f(x, y)
+ !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> ()
+
+ !dir$ noinline
+ y = g(x) + 7 * (8 + g(y))
+ !CHECK: %[[VAL_8:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32
+ !CHECK: %[[VAL_9:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32
+ !CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[C8:.*]] : i32
+ !CHECK: %[[VAL_11:.*]] = fir.no_reassoc %[[VAL_10]] : i32
+ !CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[C7:.*]] : i32
+ !CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32
+ !CHECK: fir.store %[[VAL_13]] to %[[VAL_3]] : !fir.ref<i32>
+
+ !dir$ noinline
+ call f(x, y)
+ !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> ()
+
+ !dir$ inline
+ call f(x, y)
+ !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> ()
+
+ !dir$ forceinline
+ do i = 1, 100
+ !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) {
+ !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> ()
+ call f(x, y)
+ enddo
+
+ !dir$ inline
+ do i = 1, 100
+ !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) {
+ !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> ()
+ call f(x, y)
+ enddo
+!CHECK: return
+ contains
+ subroutine f(x, y)
+ integer, intent(in) :: x
+ integer, intent(out) :: y
+ y = x*2
+ end subroutine f
+ integer function g(x)
+ integer :: x
+ g = x*2
+ end function g
+end subroutine test_inline
diff --git a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 b/flang/test/Parser/OpenMP/declare-reduction-multi.f90
index a682958..8856661 100644
--- a/flang/test/Parser/OpenMP/declare-reduction-multi.f90
+++ b/flang/test/Parser/OpenMP/declare-reduction-multi.f90
@@ -26,7 +26,8 @@ program omp_examples
type(tt) :: values(n), sum, prod, big, small
!$omp declare reduction(+:tt:omp_out%r = omp_out%r + omp_in%r) initializer(omp_priv%r = 0)
-!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out%r = omp_out%r+omp_in%r) INITIALIZER(omp_priv%r = 0_4)
+!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out%r = omp_out%r + omp_in%r) INITIALIZER(om&
+!CHECK-NEXT: !$OMP&p_priv%r = 0)
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -34,11 +35,39 @@ program omp_examples
!PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r'
+!PARSE-TREE: | | | | Variable = 'omp_out%r'
+!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Name = 'r'
+!PARSE-TREE: | | | | Expr = 'omp_out%r+omp_in%r'
+!PARSE-TREE: | | | | | Add
+!PARSE-TREE: | | | | | | Expr = 'omp_out%r'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | Name = 'r'
+!PARSE-TREE: | | | | | | Expr = 'omp_in%r'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | Name = 'r'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=0._4'
+!PARSE-TREE: | | | Variable = 'omp_priv%r'
+!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | | | Name = 'r'
+!PARSE-TREE: | | | Expr = '0_4'
+!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
!$omp declare reduction(*:tt:omp_out%r = omp_out%r * omp_in%r) initializer(omp_priv%r = 1)
-!CHECK-NEXT: !$OMP DECLARE REDUCTION(*:tt: omp_out%r = omp_out%r*omp_in%r) INITIALIZER(omp_priv%r = 1_4)
+!CHECK-NEXT: !$OMP DECLARE REDUCTION(*:tt: omp_out%r = omp_out%r * omp_in%r) INITIALIZER(om&
+!CHECK-NEXT: !$OMP&p_priv%r = 1)
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -46,11 +75,39 @@ program omp_examples
!PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Multiply
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r'
+!PARSE-TREE: | | | | Variable = 'omp_out%r'
+!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Name = 'r'
+!PARSE-TREE: | | | | Expr = 'omp_out%r*omp_in%r'
+!PARSE-TREE: | | | | | Multiply
+!PARSE-TREE: | | | | | | Expr = 'omp_out%r'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | Name = 'r'
+!PARSE-TREE: | | | | | | Expr = 'omp_in%r'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | Name = 'r'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=1._4'
+!PARSE-TREE: | | | Variable = 'omp_priv%r'
+!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | | | Name = 'r'
+!PARSE-TREE: | | | Expr = '1_4'
+!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: | Flags = None
!$omp declare reduction(max:tt:omp_out = mymax(omp_out, omp_in)) initializer(omp_priv%r = 0)
-!CHECK-NEXT: !$OMP DECLARE REDUCTION(max:tt: omp_out = mymax(omp_out,omp_in)) INITIALIZER(omp_priv%r = 0_4)
+!CHECK-NEXT: !$OMP DECLARE REDUCTION(max:tt: omp_out = mymax(omp_out, omp_in)) INITIALIZER(&
+!CHECK-NEXT: !$OMP&omp_priv%r = 0)
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -58,11 +115,36 @@ program omp_examples
!PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'max'
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)'
+!PARSE-TREE: | | | | Variable = 'omp_out'
+!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | Expr = 'mymax(omp_out,omp_in)'
+!PARSE-TREE: | | | | | FunctionReference -> Call
+!PARSE-TREE: | | | | | | ProcedureDesignator -> Name = 'mymax'
+!PARSE-TREE: | | | | | | ActualArgSpec
+!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_out'
+!PARSE-TREE: | | | | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | ActualArgSpec
+!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_in'
+!PARSE-TREE: | | | | | | | | Designator -> DataRef -> Name = 'omp_in'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=0._4'
+!PARSE-TREE: | | | Variable = 'omp_priv%r'
+!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | | | Name = 'r'
+!PARSE-TREE: | | | Expr = '0_4'
+!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
!$omp declare reduction(min:tt:omp_out%r = min(omp_out%r, omp_in%r)) initializer(omp_priv%r = 1)
-!CHECK-NEXT: !$OMP DECLARE REDUCTION(min:tt: omp_out%r = min(omp_out%r,omp_in%r)) INITIALIZER(omp_priv%r = 1_4)
+!CHECK-NEXT: !$OMP DECLARE REDUCTION(min:tt: omp_out%r = min(omp_out%r, omp_in%r)) INITIALI&
+!CHECK-NEXT: !$OMP&ZER(omp_priv%r = 1)
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -70,8 +152,38 @@ program omp_examples
!PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'min'
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)'
+!PARSE-TREE: | | | | Variable = 'omp_out%r'
+!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Name = 'r'
+!PARSE-TREE: | | | | Expr = 'min(omp_out%r,omp_in%r)'
+!PARSE-TREE: | | | | | FunctionReference -> Call
+!PARSE-TREE: | | | | | | ProcedureDesignator -> Name = 'min'
+!PARSE-TREE: | | | | | | ActualArgSpec
+!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_out%r'
+!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | Name = 'r'
+!PARSE-TREE: | | | | | | ActualArgSpec
+!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_in%r'
+!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | Name = 'r'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=1._4'
+!PARSE-TREE: | | | Variable = 'omp_priv%r'
+!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | | | Name = 'r'
+!PARSE-TREE: | | | Expr = '1_4'
+!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '1'
+!PARSE-TREE: | Flags = None
call random_number(values%r)
diff --git a/flang/test/Parser/OpenMP/declare-reduction-operator.f90 b/flang/test/Parser/OpenMP/declare-reduction-operator.f90
index e4d07c8..0d337c1 100644
--- a/flang/test/Parser/OpenMP/declare-reduction-operator.f90
+++ b/flang/test/Parser/OpenMP/declare-reduction-operator.f90
@@ -16,7 +16,8 @@ subroutine reduce_1 ( n, tts )
type(tt) :: tts(n)
type(tt2) :: tts2(n)
-!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out = tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)) INITIALIZER(omp_priv = tt(x=0_4,y=0_4))
+!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out = tt(omp_out%x - omp_in%x , omp_out%y - &
+!CHECK: !$OMP&omp_in%y)) INITIALIZER(omp_priv = tt(0,0))
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -24,13 +25,60 @@ subroutine reduce_1 ( n, tts )
!PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt(x=0_4,y=0_4)'
-
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
+!PARSE-TREE: | | | | Variable = 'omp_out'
+!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | Expr = 'tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
+!PARSE-TREE: | | | | | StructureConstructor
+!PARSE-TREE: | | | | | | DerivedTypeSpec
+!PARSE-TREE: | | | | | | | Name = 'tt'
+!PARSE-TREE: | | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%x-omp_in%x'
+!PARSE-TREE: | | | | | | | | Subtract
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%x'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%x'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%y-omp_in%y'
+!PARSE-TREE: | | | | | | | | Subtract
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%y'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | | Name = 'y'
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%y'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | | Name = 'y'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=tt(x=0_4,y=0_4)'
+!PARSE-TREE: | | | Variable = 'omp_priv'
+!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | Expr = 'tt(x=0_4,y=0_4)'
+!PARSE-TREE: | | | | StructureConstructor
+!PARSE-TREE: | | | | | DerivedTypeSpec
+!PARSE-TREE: | | | | | | Name = 'tt'
+!PARSE-TREE: | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4'
+!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4'
+!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
!$omp declare reduction(+ : tt : omp_out = tt(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt(0,0))
-!CHECK: !$OMP DECLARE REDUCTION(+:tt2: omp_out = tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)) INITIALIZER(omp_priv = tt2(x=0._8,y=0._8)
+!CHECK: !$OMP DECLARE REDUCTION(+:tt2: omp_out = tt2(omp_out%x - omp_in%x , omp_out%y &
+!CHECK: !$OMP&- omp_in%y)) INITIALIZER(omp_priv = tt2(0,0))
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -38,9 +86,55 @@ subroutine reduce_1 ( n, tts )
!PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 'tt2'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt2(x=0._8,y=0._8)'
-
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
+!PARSE-TREE: | | | | Variable = 'omp_out'
+!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | Expr = 'tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)'
+!PARSE-TREE: | | | | | StructureConstructor
+!PARSE-TREE: | | | | | | DerivedTypeSpec
+!PARSE-TREE: | | | | | | | Name = 'tt2'
+!PARSE-TREE: | | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%x-omp_in%x'
+!PARSE-TREE: | | | | | | | | Subtract
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%x'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%x'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%y-omp_in%y'
+!PARSE-TREE: | | | | | | | | Subtract
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%y'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | | Name = 'y'
+!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%y'
+!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | | Name = 'y'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=tt2(x=0._8,y=0._8)'
+!PARSE-TREE: | | | Variable = 'omp_priv'
+!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | Expr = 'tt2(x=0._8,y=0._8)'
+!PARSE-TREE: | | | | StructureConstructor
+!PARSE-TREE: | | | | | DerivedTypeSpec
+!PARSE-TREE: | | | | | | Name = 'tt2'
+!PARSE-TREE: | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4'
+!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | | | | | ComponentSpec
+!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4'
+!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
!$omp declare reduction(+ :tt2 : omp_out = tt2(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt2(0,0))
type(tt) :: diffp = tt( 0, 0 )
diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90
index 455fc17..f026f15 100644
--- a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90
+++ b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90
@@ -8,6 +8,6 @@ end
!CHECK: !DEF: /f00 (Subroutine) Subprogram
!CHECK: subroutine f00
-!CHECK: !$omp declare reduction(fred:integer,real: omp_out = omp_in+omp_out)
+!CHECK: !$omp declare reduction(fred:integer, real: omp_out = omp_in + omp_out)
!CHECK: end subroutine
diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90
index 73d7ccf..7897eb0 100644
--- a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90
+++ b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90
@@ -19,7 +19,8 @@ function func(x, n, init)
end subroutine initme
end interface
!$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0))
-!CHECK: !$OMP DECLARE REDUCTION(red_add:INTEGER(KIND=4_4): omp_out = omp_out+omp_in) INITIALIZER(initme(omp_priv, 0_4))
+!CHECK: !$OMP DECLARE REDUCTION(red_add:INTEGER(KIND=4_4): omp_out=omp_out+omp_in) INITIA&
+!CHECKL !$OMP&LIZER(initme(omp_priv,0))
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
!PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction
@@ -27,9 +28,31 @@ function func(x, n, init)
!PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'red_add'
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> KindSelector -> Scalar -> Integer -> Constant -> Expr = '4_4'
!PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '4'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerProc
-!PARSE-TREE: | | ProcedureDesignator -> Name = 'initme'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=omp_out+omp_in'
+!PARSE-TREE: | | | | Variable = 'omp_out'
+!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | Expr = 'omp_out+omp_in'
+!PARSE-TREE: | | | | | Add
+!PARSE-TREE: | | | | | | Expr = 'omp_out'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Expr = 'omp_in'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_in'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> CallStmt = 'CALL initme(omp_priv,0_4)'
+!PARSE-TREE: | | | Call
+!PARSE-TREE: | | | | ProcedureDesignator -> Name = 'initme'
+!PARSE-TREE: | | | | ActualArgSpec
+!PARSE-TREE: | | | | | ActualArg -> Expr = 'omp_priv'
+!PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | | ActualArgSpec
+!PARSE-TREE: | | | | | ActualArg -> Expr = '0_4'
+!PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
res=init
!$omp simd reduction(red_add:res)
@@ -59,7 +82,8 @@ end function func
!CHECK-LABEL: program main
program main
integer :: my_var
-!CHECK: !$OMP DECLARE REDUCTION(my_add_red:INTEGER: omp_out = omp_out+omp_in) INITIALIZER(omp_priv = 0_4)
+!CHECK: !$OMP DECLARE REDUCTION(my_add_red:INTEGER: omp_out = omp_out + omp_in) INITIA&
+!CHECK: !$OMP&LIZER(omp_priv=0)
!$omp declare reduction (my_add_red : integer : omp_out = omp_out + omp_in) initializer (omp_priv=0)
my_var = 0
@@ -74,5 +98,24 @@ end program main
!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier
!PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'my_add_red'
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec ->
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in'
-!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=0_4'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=omp_out+omp_in'
+!PARSE-TREE: | | | | Variable = 'omp_out'
+!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | Expr = 'omp_out+omp_in'
+!PARSE-TREE: | | | | | Add
+!PARSE-TREE: | | | | | | Expr = 'omp_out'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Expr = 'omp_in'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_in'
+!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | OmpStylizedDeclaration
+!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=0_4'
+!PARSE-TREE: | | | Variable = 'omp_priv'
+!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv'
+!PARSE-TREE: | | | Expr = '0_4'
+!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0'
+!PARSE-TREE: | Flags = None
diff --git a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 b/flang/test/Parser/OpenMP/metadirective-dirspec.f90
index c373001..b64ceb1 100644
--- a/flang/test/Parser/OpenMP/metadirective-dirspec.f90
+++ b/flang/test/Parser/OpenMP/metadirective-dirspec.f90
@@ -105,8 +105,8 @@ end
!UNPARSE: TYPE :: tt2
!UNPARSE: REAL :: x
!UNPARSE: END TYPE
-!UNPARSE: !$OMP METADIRECTIVE WHEN(USER={CONDITION(.true._4)}: DECLARE REDUCTION(+:tt1,tt2: omp_out%x = omp_in%x+omp_out%x)&
-!UNPARSE: !$OMP&)
+!UNPARSE: !$OMP METADIRECTIVE WHEN(USER={CONDITION(.true._4)}: DECLARE REDUCTION(+:tt1, tt2: omp&
+!UNPARSE: !$OMP&_out%x = omp_in%x + omp_out%x))
!UNPARSE: END SUBROUTINE
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpMetadirectiveDirective
@@ -127,21 +127,44 @@ end
!PARSE-TREE: | | | | | Name = 'tt1'
!PARSE-TREE: | | | | OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | | | Name = 'tt2'
-!PARSE-TREE: | | | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x'
-!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_out'
-!PARSE-TREE: | | | | | | | Name = 'x'
-!PARSE-TREE: | | | | | Expr = 'omp_in%x+omp_out%x'
-!PARSE-TREE: | | | | | | Add
-!PARSE-TREE: | | | | | | | Expr = 'omp_in%x'
-!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_in'
-!PARSE-TREE: | | | | | | | | | Name = 'x'
-!PARSE-TREE: | | | | | | | Expr = 'omp_out%x'
-!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_out'
-!PARSE-TREE: | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | | | Instance -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x'
+!PARSE-TREE: | | | | | | Variable = 'omp_out%x'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | Expr = 'omp_in%x+omp_out%x'
+!PARSE-TREE: | | | | | | | Add
+!PARSE-TREE: | | | | | | | | Expr = 'omp_in%x'
+!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | | | Expr = 'omp_out%x'
+!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | OmpStylizedInstance
+!PARSE-TREE: | | | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | | | Instance -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x'
+!PARSE-TREE: | | | | | | Variable = 'omp_out%x'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | Expr = 'omp_in%x+omp_out%x'
+!PARSE-TREE: | | | | | | | Add
+!PARSE-TREE: | | | | | | | | Expr = 'omp_in%x'
+!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | | | Expr = 'omp_out%x'
+!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | | | Name = 'x'
!PARSE-TREE: | | | OmpClauseList ->
+!PARSE-TREE: | | | Flags = None
subroutine f04
!$omp metadirective when(user={condition(.true.)}: &
diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
index 39e8f05..50a38c6 100644
--- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
+++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90
@@ -79,7 +79,7 @@ end
!UNPARSE: TYPE :: t
!UNPARSE: INTEGER :: x
!UNPARSE: END TYPE
-!UNPARSE: !$OMP DECLARE_REDUCTION(+:t: omp_out%x = omp_out%x+omp_in%x)
+!UNPARSE: !$OMP DECLARE_REDUCTION(+:t: omp_out%x = omp_out%x + omp_in%x)
!UNPARSE: END SUBROUTINE
!PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification
@@ -88,21 +88,24 @@ end
!PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add
!PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec
!PARSE-TREE: | | | Name = 't'
-!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x'
-!PARSE-TREE: | | | Variable = 'omp_out%x'
-!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | DataRef -> Name = 'omp_out'
-!PARSE-TREE: | | | | | Name = 'x'
-!PARSE-TREE: | | | Expr = 'omp_out%x+omp_in%x'
-!PARSE-TREE: | | | | Add
-!PARSE-TREE: | | | | | Expr = 'omp_out%x'
-!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_out'
-!PARSE-TREE: | | | | | | | Name = 'x'
-!PARSE-TREE: | | | | | Expr = 'omp_in%x'
-!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent
-!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_in'
-!PARSE-TREE: | | | | | | | Name = 'x'
+!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | OmpStylizedDeclaration
+!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x'
+!PARSE-TREE: | | | | Variable = 'omp_out%x'
+!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | Name = 'x'
+!PARSE-TREE: | | | | Expr = 'omp_out%x+omp_in%x'
+!PARSE-TREE: | | | | | Add
+!PARSE-TREE: | | | | | | Expr = 'omp_out%x'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out'
+!PARSE-TREE: | | | | | | | | Name = 'x'
+!PARSE-TREE: | | | | | | Expr = 'omp_in%x'
+!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent
+!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in'
+!PARSE-TREE: | | | | | | | | Name = 'x'
!PARSE-TREE: | OmpClauseList ->
!PARSE-TREE: | Flags = None
diff --git a/flang/test/Parser/compiler-directives.f90 b/flang/test/Parser/compiler-directives.f90
index 04d22ff..b2fe4663 100644
--- a/flang/test/Parser/compiler-directives.f90
+++ b/flang/test/Parser/compiler-directives.f90
@@ -72,3 +72,27 @@ subroutine no_vector
do i=1,10
enddo
end subroutine
+
+subroutine inline
+ integer :: a
+ !dir$ forceinline
+ ! CHECK: !DIR$ FORCEINLINE
+ a = f(2)
+
+ !dir$ inline
+ ! CHECK: !DIR$ INLINE
+ call g()
+
+ !dir$ noinline
+ ! CHECK: !DIR$ NOINLINE
+ call g()
+
+ contains
+ function f(x)
+ integer :: x
+ f = x**2
+ end function
+
+ subroutine g()
+ end subroutine
+end subroutine
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-error.f90 b/flang/test/Semantics/OpenMP/declare-reduction-error.f90
deleted file mode 100644
index 21f5cc1..0000000
--- a/flang/test/Semantics/OpenMP/declare-reduction-error.f90
+++ /dev/null
@@ -1,11 +0,0 @@
-! RUN: not %flang_fc1 -emit-obj -fopenmp -fopenmp-version=50 %s 2>&1 | FileCheck %s
-
-subroutine initme(x,n)
- integer x,n
- x=n
-end subroutine initme
-
-subroutine subr
- !$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0))
- !CHECK: error: Implicit subroutine declaration 'initme' in DECLARE REDUCTION
-end subroutine subr
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-functions.f90 b/flang/test/Semantics/OpenMP/declare-reduction-functions.f90
index 000d323..89e0771 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-functions.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-functions.f90
@@ -57,9 +57,10 @@ contains
!CHECK: adder: UserReductionDetails TYPE(two)
!CHECK OtherConstruct scope
!CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two)
-!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two)
-!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two)
-!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two)
+!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two)
+!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two)
!$omp simd reduction(adder:res)
@@ -101,14 +102,16 @@ contains
!CHECK: adder: UserReductionDetails TYPE(two) TYPE(three)
!CHECK OtherConstruct scope
!CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two)
-!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two)
-!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two)
-!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two)
+!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two)
+!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two)
!CHECK OtherConstruct scope
!CHECK: omp_in size=24 offset=0: ObjectEntity type: TYPE(three)
-!CHECK: omp_orig size=24 offset=24: ObjectEntity type: TYPE(three)
-!CHECK: omp_out size=24 offset=48: ObjectEntity type: TYPE(three)
-!CHECK: omp_priv size=24 offset=72: ObjectEntity type: TYPE(three)
+!CHECK: omp_out size=24 offset=24: ObjectEntity type: TYPE(three)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=24 offset=0: ObjectEntity type: TYPE(three)
+!CHECK: omp_priv size=24 offset=24: ObjectEntity type: TYPE(three)
!$omp simd reduction(adder:res3)
do i=1,n
@@ -135,9 +138,10 @@ contains
!CHECK: op.+: UserReductionDetails TYPE(two)
!CHECK OtherConstruct scope
!CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two)
-!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two)
-!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two)
-!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two)
+!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two)
+!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two)
!$omp simd reduction(+:res)
@@ -163,14 +167,16 @@ contains
!CHECK: op.+: UserReductionDetails TYPE(two) TYPE(three)
!CHECK OtherConstruct scope
!CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two)
-!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two)
-!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two)
-!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two)
+!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two)
+!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two)
!CHECK: OtherConstruct scope
!CHECK: omp_in size=24 offset=0: ObjectEntity type: TYPE(three)
-!CHECK: omp_orig size=24 offset=24: ObjectEntity type: TYPE(three)
-!CHECK: omp_out size=24 offset=48: ObjectEntity type: TYPE(three)
-!CHECK: omp_priv size=24 offset=72: ObjectEntity type: TYPE(three)
+!CHECK: omp_out size=24 offset=24: ObjectEntity type: TYPE(three)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=24 offset=0: ObjectEntity type: TYPE(three)
+!CHECK: omp_priv size=24 offset=24: ObjectEntity type: TYPE(three)
!$omp simd reduction(+:res3)
do i=1,n
@@ -183,6 +189,7 @@ contains
enddo
res%t2 = res2
res%t3 = res3
+ funcBtwothree = res
end function funcBtwothree
!! This is checking a special case, where a reduction is declared inside a
@@ -191,11 +198,12 @@ contains
pure logical function reduction()
!CHECK: reduction size=4 offset=0: ObjectEntity funcResult type: LOGICAL(4)
!CHECK: rr: UserReductionDetails INTEGER(4)
-!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes
+!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes
!CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4)
+!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes
+!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4)
+!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4)
!$omp declare reduction (rr : integer : omp_out = omp_out + omp_in) initializer (omp_priv = 0)
reduction = .false.
end function reduction
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-logical.f90 b/flang/test/Semantics/OpenMP/declare-reduction-logical.f90
index 7ab7cad..87fcecd 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-logical.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-logical.f90
@@ -18,9 +18,10 @@ contains
!CHECK: op.AND: UserReductionDetails TYPE(logicalwrapper)
!CHECK OtherConstruct scope
!CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(logicalwrapper)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(logicalwrapper)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(logicalwrapper)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper)
+!CHECK OtherConstruct scope
+!CHECK: omp_orig size=4 offset=0: ObjectEntity type: TYPE(logicalwrapper)
+!CHECK: omp_priv size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper)
!$omp simd reduction(.AND.:res)
do i=1,n
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90 b/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90
index 0882de8..763179c 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90
@@ -6,13 +6,13 @@
!type::t1
!integer(4)::val
!endtype
-!!$OMP DECLARE REDUCTION(*:t1:omp_out=omp_out*omp_in)INITIALIZER(omp_priv=&
-!!$OMP&t1(1))
+!!$OMP DECLARE REDUCTION(*:t1: omp_out=omp_out*omp_in) INITIALIZER(omp_priv=t1(&
+!!$OMP&1))
!!$OMP METADIRECTIVE OTHERWISE(DECLARE REDUCTION(+:INTEGER))
-!!$OMP DECLARE REDUCTION(.fluffy.:t1:omp_out=omp_out.fluffy.omp_in)INITIALI&
-!!$OMP&ZER(omp_priv=t1(0))
-!!$OMP DECLARE REDUCTION(.mul.:t1:omp_out=omp_out.mul.omp_in)INITIALIZER(om&
-!!$OMP&p_priv=t1(1))
+!!$OMP DECLARE REDUCTION(.fluffy.:t1: omp_out=omp_out.fluffy.omp_in) INITIALIZE&
+!!$OMP&R(omp_priv=t1(0))
+!!$OMP DECLARE REDUCTION(.mul.:t1: omp_out=omp_out.mul.omp_in) INITIALIZER(omp_&
+!!$OMP&priv=t1(1))
!interface operator(.mul.)
!procedure::mul
!end interface
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-operator.f90 b/flang/test/Semantics/OpenMP/declare-reduction-operator.f90
index dc12332..5fc4205 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-operator.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-operator.f90
@@ -11,11 +11,9 @@ module m1
!$omp declare reduction(.fluffy.:t1:omp_out=omp_out.fluffy.omp_in)
!CHECK: op.fluffy., PUBLIC: UserReductionDetails TYPE(t1)
!CHECK: t1, PUBLIC: DerivedType components: val
-!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes
+!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes
!CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(t1)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(t1)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(t1)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(t1)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(t1)
contains
function my_mul(x, y)
type (t1), intent (in) :: x, y
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-operators.f90 b/flang/test/Semantics/OpenMP/declare-reduction-operators.f90
index 84dbe1a..e0006bf 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-operators.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-operators.f90
@@ -64,9 +64,10 @@ program test_vector
!CHECK: OtherConstruct scope:
!CHECK: omp_in size=12 offset=0: ObjectEntity type: TYPE(vector)
-!CHECK: omp_orig size=12 offset=12: ObjectEntity type: TYPE(vector)
-!CHECK: omp_out size=12 offset=24: ObjectEntity type: TYPE(vector)
-!CHECK: omp_priv size=12 offset=36: ObjectEntity type: TYPE(vector)
+!CHECK: omp_out size=12 offset=12: ObjectEntity type: TYPE(vector)
+!CHECK: OtherConstruct scope:
+!CHECK: omp_orig size=12 offset=0: ObjectEntity type: TYPE(vector)
+!CHECK: omp_priv size=12 offset=12: ObjectEntity type: TYPE(vector)
v2 = Vector(0.0, 0.0, 0.0)
v1 = Vector(1.0, 2.0, 3.0)
diff --git a/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90 b/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90
index 9cd638d..115fe51 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90
@@ -33,11 +33,12 @@ program test_omp_reduction
!$omp declare reduction (.modmul. : t1 : omp_out = omp_out .modmul. omp_in) initializer(omp_priv = t1(1.0))
!CHECK: op.modmul.: UserReductionDetails TYPE(t1)
!CHECK: t1: Use from t1 in module1
-!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes
+!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes
!CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(t1)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(t1)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(t1)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(t1)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(t1)
+!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes
+!CHECK: omp_orig size=4 offset=0: ObjectEntity type: TYPE(t1)
+!CHECK: omp_priv size=4 offset=4: ObjectEntity type: TYPE(t1)
result = t1(1.0)
!$omp parallel do reduction(.modmul.:result)
do i = 1, 10
diff --git a/flang/test/Semantics/OpenMP/declare-reduction.f90 b/flang/test/Semantics/OpenMP/declare-reduction.f90
index 1f39c57..c8dee5e 100644
--- a/flang/test/Semantics/OpenMP/declare-reduction.f90
+++ b/flang/test/Semantics/OpenMP/declare-reduction.f90
@@ -19,10 +19,12 @@ function func(x, n, init)
!$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0))
!CHECK: red_add: UserReductionDetails
!CHECK: Subprogram scope: initme
+!CHECK: OtherConstruct scope:
!CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4)
+!CHECK: OtherConstruct scope:
+!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4)
+!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4)
!$omp simd reduction(red_add:res)
do i=1,n
res=res+x(i)
@@ -36,9 +38,11 @@ program main
!$omp declare reduction (my_add_red : integer : omp_out = omp_out + omp_in) initializer (omp_priv=0)
!CHECK: my_add_red: UserReductionDetails
+!CHECK: OtherConstruct scope:
!CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4)
-!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4)
-!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4)
-!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4)
+!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4)
+!CHECK: OtherConstruct scope:
+!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4)
+!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4)
end program main