diff options
Diffstat (limited to 'flang')
44 files changed, 1947 insertions, 256 deletions
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index 91c27cb..3ebb08c 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -53,6 +53,14 @@ A list of non-standard directives supported by Flang * `!dir$ novector` disabling vectorization on the following loop. * `!dir$ nounroll` disabling unrolling on the following loop. * `!dir$ nounroll_and_jam` disabling unrolling and jamming on the following loop. +* `!dir$ inline` instructs the compiler to attempt to inline the called routines if the + directive is specified before a call statement or all call statements within the loop + body if specified before a DO LOOP or all function references if specified before an + assignment statement. +* `!dir$ forceinline` works in the same way as the `inline` directive, but it forces + inlining by the compiler on a function call statement. +* `!dir$ noinline` works in the same way as the `inline` directive, but prevents + any attempt of inlining by the compiler on a function call statement. # Directive Details diff --git a/flang/include/flang/Evaluate/call.h b/flang/include/flang/Evaluate/call.h index fea09d6..93e36f6 100644 --- a/flang/include/flang/Evaluate/call.h +++ b/flang/include/flang/Evaluate/call.h @@ -255,6 +255,13 @@ public: bool IsElemental() const { return proc_.IsElemental(); } bool hasAlternateReturns() const { return hasAlternateReturns_; } + bool hasNoInline() const { return noInline_; } + void setNoInline(bool ni) { noInline_ = ni; } + bool hasAlwaysInline() const { return alwaysInline_; } + void setAlwaysInline(bool ai) { alwaysInline_ = ai; } + bool hasInlineHint() const { return inlineHint_; } + void setInlineHint(bool ih) { inlineHint_ = ih; } + Expr<SomeType> *UnwrapArgExpr(int n) { if (static_cast<std::size_t>(n) < arguments_.size() && arguments_[n]) { return arguments_[n]->UnwrapExpr(); @@ -278,6 +285,9 @@ protected: ActualArguments arguments_; Chevrons chevrons_; bool hasAlternateReturns_; + bool noInline_{false}; + bool alwaysInline_{false}; + bool inlineHint_{false}; }; template <typename A> class FunctionRef : public ProcedureRef { diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index c3cd119b..3407dd0 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -211,6 +211,8 @@ struct IntrinsicLibrary { mlir::Value genBarrierArrive(mlir::Type, llvm::ArrayRef<mlir::Value>); mlir::Value genBarrierArriveCnt(mlir::Type, llvm::ArrayRef<mlir::Value>); void genBarrierInit(llvm::ArrayRef<fir::ExtendedValue>); + mlir::Value genBarrierTryWait(mlir::Type, llvm::ArrayRef<mlir::Value>); + mlir::Value genBarrierTryWaitSleep(mlir::Type, llvm::ArrayRef<mlir::Value>); fir::ExtendedValue genBesselJn(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>); fir::ExtendedValue genBesselYn(mlir::Type, @@ -459,7 +461,21 @@ struct IntrinsicLibrary { mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>); void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>); void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadC4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadC8(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadI4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadI8(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadR2(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadR4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkLoadR8(llvm::ArrayRef<fir::ExtendedValue>); void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreI4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreI8(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreR2(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreR4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreR8(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreC4(llvm::ArrayRef<fir::ExtendedValue>); + void genTMABulkStoreC8(llvm::ArrayRef<fir::ExtendedValue>); void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>); mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>); fir::ExtendedValue genTransfer(mlir::Type, diff --git a/flang/include/flang/Optimizer/Dialect/FIRAttr.td b/flang/include/flang/Optimizer/Dialect/FIRAttr.td index 7bd96ac3..8a8c60f 100644 --- a/flang/include/flang/Optimizer/Dialect/FIRAttr.td +++ b/flang/include/flang/Optimizer/Dialect/FIRAttr.td @@ -219,4 +219,24 @@ def LocalitySpecifierTypeAttr : EnumAttr<FIROpsDialect, LocalitySpecifierType, let assemblyFormat = "`{` `type` `=` $value `}`"; } +/// Fortran inline attribute +def FIRinlineNone : I32BitEnumAttrCaseNone<"none">; +def FIRinlineNo : I32BitEnumAttrCaseBit<"no_inline", 0>; +def FIRinlineAlways : I32BitEnumAttrCaseBit<"always_inline", 1>; +def FIRinlineHint : I32BitEnumAttrCaseBit<"inline_hint", 2>; + +def fir_FortranInlineEnum + : I32BitEnumAttr<"FortranInlineEnum", "Fortran inline attributes", + [FIRinlineNone, FIRinlineNo, FIRinlineAlways, + FIRinlineHint]> { + let separator = ", "; + let cppNamespace = "::fir"; + let genSpecializedAttr = 0; + let printBitEnumPrimaryGroups = 1; +} + +def fir_FortranInlineAttr + : EnumAttr<FIROpsDialect, fir_FortranInlineEnum, "inline_attrs"> { + let assemblyFormat = "`<` $value `>`"; +} #endif // FIR_DIALECT_FIR_ATTRS diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 86502c6..58a317c 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2549,6 +2549,7 @@ def fir_CallOp : fir_Op<"call", OptionalAttr<DictArrayAttr>:$arg_attrs, OptionalAttr<DictArrayAttr>:$res_attrs, OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs, + OptionalAttr<fir_FortranInlineAttr>:$inline_attr, DefaultValuedAttr<Arith_FastMathAttr, "::mlir::arith::FastMathFlags::none">:$fastmath ); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index af8152d..bb97069 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -206,8 +206,11 @@ public: NODE(parser, CompilerDirective) NODE(CompilerDirective, AssumeAligned) NODE(CompilerDirective, IgnoreTKR) + NODE(CompilerDirective, Inline) + NODE(CompilerDirective, ForceInline) NODE(CompilerDirective, LoopCount) NODE(CompilerDirective, NameValue) + NODE(CompilerDirective, NoInline) NODE(CompilerDirective, Unrecognized) NODE(CompilerDirective, VectorAlways) NODE(CompilerDirective, Unroll) @@ -596,7 +599,7 @@ public: NODE(parser, OmpInitClause) NODE(OmpInitClause, Modifier) NODE(parser, OmpInitializerClause) - NODE(parser, OmpInitializerProc) + NODE(parser, OmpInitializerExpression) NODE(parser, OmpInReductionClause) NODE(OmpInReductionClause, Modifier) NODE(parser, OmpInteropPreference) @@ -674,6 +677,10 @@ public: NODE_ENUM(OmpSeverityClause, Severity) NODE(parser, OmpStepComplexModifier) NODE(parser, OmpStepSimpleModifier) + NODE(parser, OmpStylizedDeclaration) + NODE(parser, OmpStylizedExpression) + NODE(parser, OmpStylizedInstance) + NODE(OmpStylizedInstance, Instance) NODE(parser, OmpTaskDependenceType) NODE_ENUM(OmpTaskDependenceType, Value) NODE(parser, OmpTaskReductionClause) diff --git a/flang/include/flang/Parser/openmp-utils.h b/flang/include/flang/Parser/openmp-utils.h index f761332..49db091 100644 --- a/flang/include/flang/Parser/openmp-utils.h +++ b/flang/include/flang/Parser/openmp-utils.h @@ -25,6 +25,13 @@ namespace Fortran::parser::omp { +template <typename T> constexpr auto addr_if(std::optional<T> &x) { + return x ? &*x : nullptr; +} +template <typename T> constexpr auto addr_if(const std::optional<T> &x) { + return x ? &*x : nullptr; +} + namespace detail { using D = llvm::omp::Directive; @@ -133,9 +140,24 @@ template <typename T> OmpDirectiveName GetOmpDirectiveName(const T &x) { } const OmpObjectList *GetOmpObjectList(const OmpClause &clause); + +template <typename T> +const T *GetFirstArgument(const OmpDirectiveSpecification &spec) { + for (const OmpArgument &arg : spec.Arguments().v) { + if (auto *t{std::get_if<T>(&arg.u)}) { + return t; + } + } + return nullptr; +} + const BlockConstruct *GetFortranBlockConstruct( const ExecutionPartConstruct &epc); +const OmpCombinerExpression *GetCombinerExpr( + const OmpReductionSpecifier &rspec); +const OmpInitializerExpression *GetInitializerExpr(const OmpClause &init); + } // namespace Fortran::parser::omp #endif // FORTRAN_PARSER_OPENMP_UTILS_H diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index bb47f31..c3a8c2e 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -24,7 +24,9 @@ #include "provenance.h" #include "flang/Common/idioms.h" #include "flang/Common/indirection.h" +#include "flang/Common/reference.h" #include "flang/Support/Fortran.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Frontend/OpenACC/ACC.h.inc" #include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" @@ -3356,6 +3358,9 @@ struct StmtFunctionStmt { // !DIR$ NOVECTOR // !DIR$ NOUNROLL // !DIR$ NOUNROLL_AND_JAM +// !DIR$ FORCEINLINE +// !DIR$ INLINE +// !DIR$ NOINLINE // !DIR$ <anything else> struct CompilerDirective { UNION_CLASS_BOILERPLATE(CompilerDirective); @@ -3384,11 +3389,14 @@ struct CompilerDirective { EMPTY_CLASS(NoVector); EMPTY_CLASS(NoUnroll); EMPTY_CLASS(NoUnrollAndJam); + EMPTY_CLASS(ForceInline); + EMPTY_CLASS(Inline); + EMPTY_CLASS(NoInline); EMPTY_CLASS(Unrecognized); CharBlock source; std::variant<std::list<IgnoreTKR>, LoopCount, std::list<AssumeAligned>, VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized, - NoVector, NoUnroll, NoUnrollAndJam> + NoVector, NoUnroll, NoUnrollAndJam, ForceInline, Inline, NoInline> u; }; @@ -3504,6 +3512,8 @@ struct OmpDirectiveName { // type-name list item struct OmpTypeName { + CharBlock source; + mutable const semantics::DeclTypeSpec *declTypeSpec{nullptr}; UNION_CLASS_BOILERPLATE(OmpTypeName); std::variant<TypeSpec, DeclarationTypeSpec> u; }; @@ -3532,6 +3542,39 @@ struct OmpObjectList { WRAPPER_CLASS_BOILERPLATE(OmpObjectList, std::list<OmpObject>); }; +struct OmpStylizedDeclaration { + COPY_AND_ASSIGN_BOILERPLATE(OmpStylizedDeclaration); + // Since "Reference" isn't handled by parse-tree-visitor, add EmptyTrait, + // and visit the members by hand when needed. + using EmptyTrait = std::true_type; + common::Reference<const OmpTypeName> type; + EntityDecl var; +}; + +struct OmpStylizedInstance { + struct Instance { + UNION_CLASS_BOILERPLATE(Instance); + std::variant<AssignmentStmt, CallStmt, common::Indirection<Expr>> u; + }; + TUPLE_CLASS_BOILERPLATE(OmpStylizedInstance); + std::tuple<std::list<OmpStylizedDeclaration>, Instance> t; +}; + +class ParseState; + +// Ref: [5.2:76], [6.0:185] +// +struct OmpStylizedExpression { + CharBlock source; + // Pointer to a temporary copy of the ParseState that is used to create + // additional parse subtrees for the stylized expression. This is only + // used internally during parsing and conveys no information to the + // consumers of the AST. + const ParseState *state{nullptr}; + WRAPPER_CLASS_BOILERPLATE( + OmpStylizedExpression, std::list<OmpStylizedInstance>); +}; + // Ref: [4.5:201-207], [5.0:293-299], [5.1:325-331], [5.2:124] // // reduction-identifier -> @@ -3549,9 +3592,22 @@ struct OmpReductionIdentifier { // combiner-expression -> // since 4.5 // assignment-statement | // function-reference -struct OmpCombinerExpression { - UNION_CLASS_BOILERPLATE(OmpCombinerExpression); - std::variant<AssignmentStmt, FunctionReference> u; +struct OmpCombinerExpression : public OmpStylizedExpression { + INHERITED_WRAPPER_CLASS_BOILERPLATE( + OmpCombinerExpression, OmpStylizedExpression); + static llvm::ArrayRef<CharBlock> Variables(); +}; + +// Ref: [4.5:222:7-8], [5.0:305:28-29], [5.1:337:20-21], [5.2:127:6-8], +// [6.0:242:3-5] +// +// initializer-expression -> // since 4.5 +// OMP_PRIV = expression | +// subroutine-name(argument-list) +struct OmpInitializerExpression : public OmpStylizedExpression { + INHERITED_WRAPPER_CLASS_BOILERPLATE( + OmpInitializerExpression, OmpStylizedExpression); + static llvm::ArrayRef<CharBlock> Variables(); }; inline namespace arguments { @@ -4552,16 +4608,9 @@ struct OmpInReductionClause { std::tuple<MODIFIERS(), OmpObjectList> t; }; -// declare-reduction -> DECLARE REDUCTION (reduction-identifier : type-list -// : combiner) [initializer-clause] -struct OmpInitializerProc { - TUPLE_CLASS_BOILERPLATE(OmpInitializerProc); - std::tuple<ProcedureDesignator, std::list<ActualArgSpec>> t; -}; // Initialization for declare reduction construct struct OmpInitializerClause { - UNION_CLASS_BOILERPLATE(OmpInitializerClause); - std::variant<OmpInitializerProc, AssignmentStmt> u; + WRAPPER_CLASS_BOILERPLATE(OmpInitializerClause, OmpInitializerExpression); }; // Ref: [4.5:199-201], [5.0:288-290], [5.1:321-322], [5.2:115-117] diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 04a0639..cb27d544 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -830,6 +830,8 @@ public: OmpUseDevicePtr, OmpUseDeviceAddr, OmpIsDevicePtr, OmpHasDeviceAddr, // OpenMP data-copying attribute OmpCopyIn, OmpCopyPrivate, + // OpenMP special variables + OmpInVar, OmpOrigVar, OmpOutVar, OmpPrivVar, // OpenMP miscellaneous flags OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned, OmpNontemporal, OmpAllocate, OmpDeclarativeAllocateDirective, diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index a516a44..6e72987 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1884,6 +1884,26 @@ private: setCurrentPosition(stmt.source); assert(stmt.typedCall && "Call was not analyzed"); mlir::Value res{}; + + // Set 'no_inline', 'inline_hint' or 'always_inline' to true on the + // ProcedureRef. The NoInline and AlwaysInline attribute will be set in + // genProcedureRef later. + for (const auto *dir : eval.dirs) { + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::CompilerDirective::ForceInline &) { + stmt.typedCall->setAlwaysInline(true); + }, + [&](const Fortran::parser::CompilerDirective::Inline &) { + stmt.typedCall->setInlineHint(true); + }, + [&](const Fortran::parser::CompilerDirective::NoInline &) { + stmt.typedCall->setNoInline(true); + }, + [&](const auto &) {}}, + dir->u); + } + if (lowerToHighLevelFIR()) { std::optional<mlir::Type> resultType; if (stmt.typedCall->hasAlternateReturns()) @@ -2200,6 +2220,50 @@ private: // so no clean-up needs to be generated for these entities. } + void attachInlineAttributes( + mlir::Operation &op, + const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs) { + if (dirs.empty()) + return; + + for (mlir::Value operand : op.getOperands()) { + if (operand.getDefiningOp()) + attachInlineAttributes(*operand.getDefiningOp(), dirs); + } + + if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) { + for (const auto *dir : dirs) { + Fortran::common::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::CompilerDirective::NoInline &) { + callOp.setInlineAttr(fir::FortranInlineEnum::no_inline); + }, + [&](const Fortran::parser::CompilerDirective::Inline &) { + callOp.setInlineAttr(fir::FortranInlineEnum::inline_hint); + }, + [&](const Fortran::parser::CompilerDirective::ForceInline &) { + callOp.setInlineAttr(fir::FortranInlineEnum::always_inline); + }, + [&](const auto &) {}}, + dir->u); + } + } + } + + void attachAttributesToDoLoopOperations( + fir::DoLoopOp &doLoop, + llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) { + if (!doLoop.getOperation() || dirs.empty()) + return; + + for (mlir::Block &block : doLoop.getRegion()) { + for (mlir::Operation &op : block.getOperations()) { + if (!dirs.empty()) + attachInlineAttributes(op, dirs); + } + } + } + /// Generate FIR for a DO construct. There are six variants: /// - unstructured infinite and while loops /// - structured and unstructured increment loops @@ -2351,6 +2415,11 @@ private: if (!incrementLoopNestInfo.empty() && incrementLoopNestInfo.back().isConcurrent) localSymbols.popScope(); + + // Add attribute(s) on operations in fir::DoLoopOp if necessary + for (IncrementLoopInfo &info : incrementLoopNestInfo) + if (auto loopOp = mlir::dyn_cast_if_present<fir::DoLoopOp>(info.loopOp)) + attachAttributesToDoLoopOperations(loopOp, doStmtEval.dirs); } /// Generate FIR to evaluate loop control values (lower, upper and step). @@ -3154,6 +3223,26 @@ private: e->dirs.push_back(&dir); } + void + attachInliningDirectiveToStmt(const Fortran::parser::CompilerDirective &dir, + Fortran::lower::pft::Evaluation *e) { + while (e->isDirective()) + e = e->lexicalSuccessor; + + // If the successor is a statement or a do loop, the compiler + // will perform inlining. + if (e->isA<Fortran::parser::CallStmt>() || + e->isA<Fortran::parser::NonLabelDoStmt>() || + e->isA<Fortran::parser::AssignmentStmt>()) { + e->dirs.push_back(&dir); + } else { + mlir::Location loc = toLocation(); + mlir::emitWarning(loc, + "Inlining directive not in front of loops, function" + "call or assignment.\n"); + } + } + void genFIR(const Fortran::parser::CompilerDirective &dir) { Fortran::lower::pft::Evaluation &eval = getEval(); @@ -3177,6 +3266,15 @@ private: [&](const Fortran::parser::CompilerDirective::NoUnrollAndJam &) { attachDirectiveToLoop(dir, &eval); }, + [&](const Fortran::parser::CompilerDirective::ForceInline &) { + attachInliningDirectiveToStmt(dir, &eval); + }, + [&](const Fortran::parser::CompilerDirective::Inline &) { + attachInliningDirectiveToStmt(dir, &eval); + }, + [&](const Fortran::parser::CompilerDirective::NoInline &) { + attachInliningDirectiveToStmt(dir, &eval); + }, [&](const auto &) {}}, dir.u); } @@ -5086,7 +5184,9 @@ private: void genDataAssignment( const Fortran::evaluate::Assignment &assign, - const Fortran::evaluate::ProcedureRef *userDefinedAssignment) { + const Fortran::evaluate::ProcedureRef *userDefinedAssignment, + const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs = + {}) { mlir::Location loc = getCurrentLocation(); fir::FirOpBuilder &builder = getFirOpBuilder(); @@ -5166,10 +5266,20 @@ private: genCUDADataTransfer(builder, loc, assign, lhs, rhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); - else + else { + // If RHS or LHS have a CallOp in their expression, this operation will + // have the 'no_inline' or 'always_inline' attribute if there is a + // directive just before the assignement. + if (!dirs.empty()) { + if (rhs.getDefiningOp()) + attachInlineAttributes(*rhs.getDefiningOp(), dirs); + if (lhs.getDefiningOp()) + attachInlineAttributes(*lhs.getDefiningOp(), dirs); + } hlfir::AssignOp::create(builder, loc, rhs, lhs, isWholeAllocatableAssignment, keepLhsLengthInAllocatableAssignment); + } if (hasCUDAImplicitTransfer && !isInDeviceContext) { localSymbols.popScope(); for (mlir::Value temp : implicitTemps) @@ -5237,16 +5347,21 @@ private: } /// Shared for both assignments and pointer assignments. - void genAssignment(const Fortran::evaluate::Assignment &assign) { + void + genAssignment(const Fortran::evaluate::Assignment &assign, + const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> + &dirs = {}) { mlir::Location loc = toLocation(); if (lowerToHighLevelFIR()) { Fortran::common::visit( Fortran::common::visitors{ [&](const Fortran::evaluate::Assignment::Intrinsic &) { - genDataAssignment(assign, /*userDefinedAssignment=*/nullptr); + genDataAssignment(assign, /*userDefinedAssignment=*/nullptr, + dirs); }, [&](const Fortran::evaluate::ProcedureRef &procRef) { - genDataAssignment(assign, /*userDefinedAssignment=*/&procRef); + genDataAssignment(assign, /*userDefinedAssignment=*/&procRef, + dirs); }, [&](const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) { if (isInsideHlfirForallOrWhere()) @@ -5651,7 +5766,8 @@ private: } void genFIR(const Fortran::parser::AssignmentStmt &stmt) { - genAssignment(*stmt.typedAssignment->v); + Fortran::lower::pft::Evaluation &eval = getEval(); + genAssignment(*stmt.typedAssignment->v, eval.dirs); } void genFIR(const Fortran::parser::SyncAllStmt &stmt) { diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index fb72040..9bf994e 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -700,9 +700,20 @@ Fortran::lower::genCallOpAndResult( callResult = dispatch.getResult(0); } else { // Standard procedure call with fir.call. + fir::FortranInlineEnumAttr inlineAttr; + + if (caller.getCallDescription().hasNoInline()) + inlineAttr = fir::FortranInlineEnumAttr::get( + builder.getContext(), fir::FortranInlineEnum::no_inline); + else if (caller.getCallDescription().hasInlineHint()) + inlineAttr = fir::FortranInlineEnumAttr::get( + builder.getContext(), fir::FortranInlineEnum::inline_hint); + else if (caller.getCallDescription().hasAlwaysInline()) + inlineAttr = fir::FortranInlineEnumAttr::get( + builder.getContext(), fir::FortranInlineEnum::always_inline); auto call = fir::CallOp::create( builder, loc, funcType.getResults(), funcSymbolAttr, operands, - /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs); + /*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs, inlineAttr); callNumResults = call.getNumResults(); if (callNumResults != 0) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 39bac81..53fe9c0 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -50,6 +50,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -358,6 +359,14 @@ static constexpr IntrinsicHandler handlers[]{ &I::genBarrierInit, {{{"barrier", asAddr}, {"count", asValue}}}, /*isElemental=*/false}, + {"barrier_try_wait", + &I::genBarrierTryWait, + {{{"barrier", asAddr}, {"token", asValue}}}, + /*isElemental=*/false}, + {"barrier_try_wait_sleep", + &I::genBarrierTryWaitSleep, + {{{"barrier", asAddr}, {"token", asValue}, {"ns", asValue}}}, + /*isElemental=*/false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -1036,10 +1045,87 @@ static constexpr IntrinsicHandler handlers[]{ {"dst", asAddr}, {"nbytes", asValue}}}, /*isElemental=*/false}, + {"tma_bulk_ldc4", + &I::genTMABulkLoadC4, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldc8", + &I::genTMABulkLoadC8, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldi4", + &I::genTMABulkLoadI4, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldi8", + &I::genTMABulkLoadI8, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldr2", + &I::genTMABulkLoadR2, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldr4", + &I::genTMABulkLoadR4, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_ldr8", + &I::genTMABulkLoadR8, + {{{"barrier", asAddr}, + {"src", asAddr}, + {"dst", asAddr}, + {"nelems", asValue}}}, + /*isElemental=*/false}, {"tma_bulk_s2g", &I::genTMABulkS2G, {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}}, /*isElemental=*/false}, + {"tma_bulk_store_c4", + &I::genTMABulkStoreC4, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_c8", + &I::genTMABulkStoreC8, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_i4", + &I::genTMABulkStoreI4, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_i8", + &I::genTMABulkStoreI8, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_r2", + &I::genTMABulkStoreR2, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_r4", + &I::genTMABulkStoreR4, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, + {"tma_bulk_store_r8", + &I::genTMABulkStoreR8, + {{{"src", asAddr}, {"dst", asAddr}, {"count", asValue}}}, + /*isElemental=*/false}, {"tma_bulk_wait_group", &I::genTMABulkWaitGroup, {{}}, @@ -3282,6 +3368,57 @@ void IntrinsicLibrary::genBarrierInit(llvm::ArrayRef<fir::ExtendedValue> args) { mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space); } +// BARRIER_TRY_WAIT (CUDA) +mlir::Value +IntrinsicLibrary::genBarrierTryWait(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 2); + mlir::Value res = fir::AllocaOp::create(builder, loc, resultType); + mlir::Value zero = builder.createIntegerConstant(loc, resultType, 0); + fir::StoreOp::create(builder, loc, zero, res); + mlir::Value ns = + builder.createIntegerConstant(loc, builder.getI32Type(), 1000000); + mlir::Value load = fir::LoadOp::create(builder, loc, res); + auto whileOp = mlir::scf::WhileOp::create( + builder, loc, mlir::TypeRange{resultType}, mlir::ValueRange{load}); + mlir::Block *beforeBlock = builder.createBlock(&whileOp.getBefore()); + mlir::Value beforeArg = beforeBlock->addArgument(resultType, loc); + builder.setInsertionPointToStart(beforeBlock); + mlir::Value condition = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::ne, beforeArg, zero); + mlir::scf::ConditionOp::create(builder, loc, condition, beforeArg); + mlir::Block *afterBlock = builder.createBlock(&whileOp.getAfter()); + afterBlock->addArgument(resultType, loc); + builder.setInsertionPointToStart(afterBlock); + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); + auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]); + mlir::Value ret = + mlir::NVVM::InlinePtxOp::create( + builder, loc, {resultType}, {barrier, args[1], ns}, {}, + ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; " + "selp.b32 %0, 1, 0, p;", + {}) + .getResult(0); + mlir::scf::YieldOp::create(builder, loc, ret); + builder.setInsertionPointAfter(whileOp); + return whileOp.getResult(0); +} + +// BARRIER_TRY_WAIT_SLEEP (CUDA) +mlir::Value +IntrinsicLibrary::genBarrierTryWaitSleep(mlir::Type resultType, + llvm::ArrayRef<mlir::Value> args) { + assert(args.size() == 3); + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); + auto barrier = builder.createConvert(loc, llvmPtrTy, args[0]); + return mlir::NVVM::InlinePtxOp::create( + builder, loc, {resultType}, {barrier, args[1], args[2]}, {}, + ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%1], %2, %3; " + "selp.b32 %0, 1, 0, p;", + {}) + .getResult(0); +} + // BESSEL_JN fir::ExtendedValue IntrinsicLibrary::genBesselJn(mlir::Type resultType, @@ -9218,6 +9355,93 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) { builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); } +static void genTMABulkLoad(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value barrier, mlir::Value src, + mlir::Value dst, mlir::Value nelem, + mlir::Value eleSize) { + mlir::Value size = mlir::arith::MulIOp::create(builder, loc, nelem, eleSize); + auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext()); + barrier = builder.createConvert(loc, llvmPtrTy, barrier); + mlir::NVVM::InlinePtxOp::create( + builder, loc, mlir::TypeRange{}, {dst, src, size, barrier}, {}, + "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], " + "[%1], %2, [%3];", + {}); + mlir::NVVM::InlinePtxOp::create( + builder, loc, mlir::TypeRange{}, {barrier, size}, {}, + "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;", {}); +} + +// TMA_BULK_LOADC4 +void IntrinsicLibrary::genTMABulkLoadC4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADC8 +void IntrinsicLibrary::genTMABulkLoadC8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 16); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADI4 +void IntrinsicLibrary::genTMABulkLoadI4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 4); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADI8 +void IntrinsicLibrary::genTMABulkLoadI8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADR2 +void IntrinsicLibrary::genTMABulkLoadR2( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 2); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADR4 +void IntrinsicLibrary::genTMABulkLoadR4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 4); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + +// TMA_BULK_LOADR8 +void IntrinsicLibrary::genTMABulkLoadR8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 4); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkLoad(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), fir::getBase(args[3]), eleSize); +} + // TMA_BULK_S2G (CUDA) void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) { assert(args.size() == 3); @@ -9227,6 +9451,97 @@ void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) { mlir::NVVM::NVVMMemorySpace::Global); mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create( builder, loc, dst, src, fir::getBase(args[2]), {}, {}); + + mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {}, + "cp.async.bulk.commit_group", {}); + mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc, + builder.getI32IntegerAttr(0), {}); +} + +static void genTMABulkStore(fir::FirOpBuilder &builder, mlir::Location loc, + mlir::Value src, mlir::Value dst, mlir::Value count, + mlir::Value eleSize) { + mlir::Value size = mlir::arith::MulIOp::create(builder, loc, eleSize, count); + src = convertPtrToNVVMSpace(builder, loc, src, + mlir::NVVM::NVVMMemorySpace::Shared); + dst = convertPtrToNVVMSpace(builder, loc, dst, + mlir::NVVM::NVVMMemorySpace::Global); + mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(builder, loc, dst, src, + size, {}, {}); + mlir::NVVM::InlinePtxOp::create(builder, loc, mlir::TypeRange{}, {}, {}, + "cp.async.bulk.commit_group", {}); + mlir::NVVM::CpAsyncBulkWaitGroupOp::create(builder, loc, + builder.getI32IntegerAttr(0), {}); +} + +// TMA_BULK_STORE_C4 (CUDA) +void IntrinsicLibrary::genTMABulkStoreC4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_C8 (CUDA) +void IntrinsicLibrary::genTMABulkStoreC8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 16); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_I4 (CUDA) +void IntrinsicLibrary::genTMABulkStoreI4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 4); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_I8 (CUDA) +void IntrinsicLibrary::genTMABulkStoreI8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_R2 (CUDA) +void IntrinsicLibrary::genTMABulkStoreR2( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 2); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_R4 (CUDA) +void IntrinsicLibrary::genTMABulkStoreR4( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 4); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); +} + +// TMA_BULK_STORE_R8 (CUDA) +void IntrinsicLibrary::genTMABulkStoreR8( + llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value eleSize = + builder.createIntegerConstant(loc, builder.getI32Type(), 8); + genTMABulkStore(builder, loc, fir::getBase(args[0]), fir::getBase(args[1]), + fir::getBase(args[2]), eleSize); } // TMA_BULK_WAIT_GROUP (CUDA) diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 478ab15..ca4aefb 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -680,6 +680,18 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> { if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr()) llvmCall.setResAttrsAttr(resAttrs); + if (auto inlineAttr = call.getInlineAttrAttr()) { + llvmCall->removeAttr("inline_attr"); + if (inlineAttr.getValue() == fir::FortranInlineEnum::no_inline) { + llvmCall.setNoInlineAttr(rewriter.getUnitAttr()); + } else if (inlineAttr.getValue() == fir::FortranInlineEnum::inline_hint) { + llvmCall.setInlineHintAttr(rewriter.getUnitAttr()); + } else if (inlineAttr.getValue() == + fir::FortranInlineEnum::always_inline) { + llvmCall.setAlwaysInlineAttr(rewriter.getUnitAttr()); + } + } + if (memAttr) llvmCall.setMemoryEffectsAttr( mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr)); diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp index 0776346..8ca2869 100644 --- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp +++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp @@ -143,7 +143,8 @@ public: llvm::SmallVector<mlir::Type> operandsTypes; for (auto arg : gpuLaunchFunc.getKernelOperands()) operandsTypes.push_back(arg.getType()); - auto fctTy = mlir::FunctionType::get(&context, operandsTypes, {}); + auto fctTy = mlir::FunctionType::get(&context, operandsTypes, + gpuLaunchFunc.getResultTypes()); if (!hasPortableSignature(fctTy, op)) convertCallOp(gpuLaunchFunc, fctTy); } else if (auto addr = mlir::dyn_cast<fir::AddrOfOp>(op)) { @@ -520,10 +521,14 @@ public: llvm::SmallVector<mlir::Value, 1> newCallResults; // TODO propagate/update call argument and result attributes. if constexpr (std::is_same_v<std::decay_t<A>, mlir::gpu::LaunchFuncOp>) { + mlir::Value asyncToken = callOp.getAsyncToken(); auto newCall = A::create(*rewriter, loc, callOp.getKernel(), callOp.getGridSizeOperandValues(), callOp.getBlockSizeOperandValues(), - callOp.getDynamicSharedMemorySize(), newOpers); + callOp.getDynamicSharedMemorySize(), newOpers, + asyncToken ? asyncToken.getType() : nullptr, + callOp.getAsyncDependencies(), + /*clusterSize=*/std::nullopt); if (callOp.getClusterSizeX()) newCall.getClusterSizeXMutable().assign(callOp.getClusterSizeX()); if (callOp.getClusterSizeY()) diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index d0164f3..4f97aca 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4484,7 +4484,7 @@ void fir::IfOp::getSuccessorRegions( llvm::SmallVectorImpl<mlir::RegionSuccessor> ®ions) { // The `then` and the `else` region branch back to the parent operation. if (!point.isParent()) { - regions.push_back(mlir::RegionSuccessor(getResults())); + regions.push_back(mlir::RegionSuccessor(getOperation(), getResults())); return; } @@ -4494,7 +4494,8 @@ void fir::IfOp::getSuccessorRegions( // Don't consider the else region if it is empty. mlir::Region *elseRegion = &this->getElseRegion(); if (elseRegion->empty()) - regions.push_back(mlir::RegionSuccessor()); + regions.push_back( + mlir::RegionSuccessor(getOperation(), getOperation()->getResults())); else regions.push_back(mlir::RegionSuccessor(elseRegion)); } @@ -4513,7 +4514,7 @@ void fir::IfOp::getEntrySuccessorRegions( if (!getElseRegion().empty()) regions.emplace_back(&getElseRegion()); else - regions.emplace_back(getResults()); + regions.emplace_back(getOperation(), getOperation()->getResults()); } } diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp index 25a8f7a..8c0acc5 100644 --- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp @@ -246,7 +246,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> { args.append(dispatch.getArgs().begin(), dispatch.getArgs().end()); rewriter.replaceOpWithNewOp<fir::CallOp>( dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(), - dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr()); + dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(), + /*inline_attr*/ fir::FortranInlineEnumAttr{}); return mlir::success(); } diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index d33a18f..59fe7d8 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1314,6 +1314,11 @@ constexpr auto novector{"NOVECTOR" >> construct<CompilerDirective::NoVector>()}; constexpr auto nounroll{"NOUNROLL" >> construct<CompilerDirective::NoUnroll>()}; constexpr auto nounrollAndJam{ "NOUNROLL_AND_JAM" >> construct<CompilerDirective::NoUnrollAndJam>()}; +constexpr auto forceinlineDir{ + "FORCEINLINE" >> construct<CompilerDirective::ForceInline>()}; +constexpr auto noinlineDir{ + "NOINLINE" >> construct<CompilerDirective::NoInline>()}; +constexpr auto inlineDir{"INLINE" >> construct<CompilerDirective::Inline>()}; TYPE_PARSER(beginDirective >> "DIR$ "_tok >> sourced((construct<CompilerDirective>(ignore_tkr) || construct<CompilerDirective>(loopCount) || @@ -1324,6 +1329,9 @@ TYPE_PARSER(beginDirective >> "DIR$ "_tok >> construct<CompilerDirective>(novector) || construct<CompilerDirective>(nounrollAndJam) || construct<CompilerDirective>(nounroll) || + construct<CompilerDirective>(noinlineDir) || + construct<CompilerDirective>(forceinlineDir) || + construct<CompilerDirective>(inlineDir) || construct<CompilerDirective>( many(construct<CompilerDirective::NameValue>( name, maybe(("="_tok || ":"_tok) >> digitString64))))) / diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index d1e081c..4159d2e 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -275,6 +275,13 @@ struct SpecificModifierParser { // --- Iterator helpers ----------------------------------------------- +static EntityDecl MakeEntityDecl(ObjectName &&name) { + return EntityDecl( + /*ObjectName=*/std::move(name), std::optional<ArraySpec>{}, + std::optional<CoarraySpec>{}, std::optional<CharLength>{}, + std::optional<Initialization>{}); +} + // [5.0:47:17-18] In an iterator-specifier, if the iterator-type is not // specified then the type of that iterator is default integer. // [5.0:49:14] The iterator-type must be an integer type. @@ -282,11 +289,7 @@ static std::list<EntityDecl> makeEntityList(std::list<ObjectName> &&names) { std::list<EntityDecl> entities; for (auto iter = names.begin(), end = names.end(); iter != end; ++iter) { - EntityDecl entityDecl( - /*ObjectName=*/std::move(*iter), std::optional<ArraySpec>{}, - std::optional<CoarraySpec>{}, std::optional<CharLength>{}, - std::optional<Initialization>{}); - entities.push_back(std::move(entityDecl)); + entities.push_back(MakeEntityDecl(std::move(*iter))); } return entities; } @@ -306,6 +309,217 @@ static TypeDeclarationStmt makeIterSpecDecl(std::list<ObjectName> &&names) { makeEntityList(std::move(names))); } +// --- Stylized expression handling ----------------------------------- + +// OpenMP has a concept of am "OpenMP stylized expression". Syntactially +// it looks like a typical Fortran expression (or statement), except: +// - the only variables allowed in it are OpenMP special variables, the +// exact set of these variables depends on the specific case of the +// stylized expression +// - the special OpenMP variables present may assume one or more types, +// and the expression should be semantically valid for each type. +// +// The stylized expression can be thought of as a template, which will be +// instantiated for each type provided somewhere in the context in which +// the stylized expression appears. +// +// AST nodes: +// - OmpStylizedExpression: contains the source string for the expression, +// plus the list of instances (OmpStylizedInstance). +// - OmpStylizedInstance: corresponds to the instantiation of the stylized +// expression for a specific type. The way that the type is specified is +// by creating declarations (OmpStylizedDeclaration) for the special +// variables. Together with the AST tree corresponding to the stylized +// expression the instantiation has enough information for semantic +// analysis. Each instance has its own scope, and the special variables +// have their own Symbol's (local to the scope). +// - OmpStylizedDeclaration: encapsulates the information that the visitors +// in resolve-names can use to "emulate" a declaration for a special +// variable and allow name resolution in the instantiation AST to work. +// +// Implementation specifics: +// The semantic analysis stores "evaluate::Expr" in each AST node rooted +// in parser::Expr (in the typedExpr member). The evaluate::Expr is specific +// to a given type, and so to allow different types for a given expression, +// for each type a separate copy of the parser::Expr subtree is created. +// Normally, AST nodes are non-copyable (copy-ctor is deleted), so to create +// several copies of a subtree, the same source string is parsed several +// times. The ParseState member in OmpStylizedExpression is the parser state +// immediately before the stylized expression. +// +// Initially, when OmpStylizedExpression is first created, the expression is +// parsed as if it was an actual code, but this parsing is only done to +// establish where the stylized expression ends (in the source). The source +// and the initial parser state are stored in the object, and the instance +// list is empty. +// Once the parsing of the containing OmpDirectiveSpecification completes, +// a post-processing "parser" (OmpStylizedInstanceCreator) executes. This +// post-processor examines the directive specification to see if it expects +// any stylized expressions to be contained in it, and then instantiates +// them for each such directive. + +template <typename A> struct NeverParser { + using resultType = A; + std::optional<resultType> Parse(ParseState &state) const { + // Always fail, but without any messages. + return std::nullopt; + } +}; + +template <typename A> constexpr auto never() { return NeverParser<A>{}; } + +// Parser for optional<T> which always succeeds and returns std::nullptr. +// It's only needed to produce "std::optional<CallStmt::Chevrons>" in +// CallStmt. +template <typename A, typename B = void> struct NullParser; +template <typename B> struct NullParser<std::optional<B>> { + using resultType = std::optional<B>; + std::optional<resultType> Parse(ParseState &) const { + return resultType{std::nullopt}; + } +}; + +template <typename A> constexpr auto null() { return NullParser<A>{}; } + +// OmpStylizedDeclaration and OmpStylizedInstance are helper classes, and +// don't correspond to anything in the source. Their parsers should still +// exist, but they should never be executed. +TYPE_PARSER(construct<OmpStylizedDeclaration>(never<OmpStylizedDeclaration>())) +TYPE_PARSER(construct<OmpStylizedInstance>(never<OmpStylizedInstance>())) + +TYPE_PARSER( // + construct<OmpStylizedInstance::Instance>(Parser<AssignmentStmt>{}) || + construct<OmpStylizedInstance::Instance>( + sourced(construct<CallStmt>(Parser<ProcedureDesignator>{}, + null<std::optional<CallStmt::Chevrons>>(), + parenthesized(optionalList(actualArgSpec))))) || + construct<OmpStylizedInstance::Instance>(indirect(expr))) + +struct OmpStylizedExpressionParser { + using resultType = OmpStylizedExpression; + + std::optional<resultType> Parse(ParseState &state) const { + auto *saved{new ParseState(state)}; + auto getSource{verbatim(Parser<OmpStylizedInstance::Instance>{} >> ok)}; + if (auto &&ok{getSource.Parse(state)}) { + OmpStylizedExpression result{std::list<OmpStylizedInstance>{}}; + result.source = ok->source; + result.state = saved; + // result.v remains empty + return std::move(result); + } + delete saved; + return std::nullopt; + } +}; + +static void Instantiate(OmpStylizedExpression &ose, + llvm::ArrayRef<const OmpTypeName *> types, llvm::ArrayRef<CharBlock> vars) { + // 1. For each var in the vars list, declare it with the corresponding + // type from types. + // 2. Run the parser to get the AST for the stylized expression. + // 3. Create OmpStylizedInstance and append it to the list in ose. + assert(types.size() == vars.size() && "List size mismatch"); + // A ParseState object is irreversibly modified during parsing (in + // particular, it cannot be rewound to an earlier position in the source). + // Because of that we need to create a local copy for each instantiation. + // If rewinding was possible, we could just use the current one, and we + // wouldn't need to save it in the AST node. + ParseState state{DEREF(ose.state)}; + + std::list<OmpStylizedDeclaration> decls; + for (auto [type, var] : llvm::zip_equal(types, vars)) { + decls.emplace_back(OmpStylizedDeclaration{ + common::Reference(*type), MakeEntityDecl(Name{var})}); + } + + if (auto &&instance{Parser<OmpStylizedInstance::Instance>{}.Parse(state)}) { + ose.v.emplace_back( + OmpStylizedInstance{std::move(decls), std::move(*instance)}); + } +} + +static void InstantiateForTypes(OmpStylizedExpression &ose, + const OmpTypeNameList &typeNames, llvm::ArrayRef<CharBlock> vars) { + // For each type in the type list, declare all variables in vars with + // that type, and complete the instantiation. + for (const OmpTypeName &t : typeNames.v) { + std::vector<const OmpTypeName *> types(vars.size(), &t); + Instantiate(ose, types, vars); + } +} + +static void InstantiateDeclareReduction(OmpDirectiveSpecification &spec) { + // There can be arguments/clauses that don't make sense, that analysis + // is left until semantic checks. Tolerate any unexpected stuff. + auto *rspec{GetFirstArgument<OmpReductionSpecifier>(spec)}; + if (!rspec) { + return; + } + + const OmpTypeNameList *typeNames{nullptr}; + + if (auto *cexpr{ + const_cast<OmpCombinerExpression *>(GetCombinerExpr(*rspec))}) { + typeNames = &std::get<OmpTypeNameList>(rspec->t); + + InstantiateForTypes(*cexpr, *typeNames, OmpCombinerExpression::Variables()); + delete cexpr->state; + cexpr->state = nullptr; + } else { + // If there are no types, there is nothing else to do. + return; + } + + for (const OmpClause &clause : spec.Clauses().v) { + llvm::omp::Clause id{clause.Id()}; + if (id == llvm::omp::Clause::OMPC_initializer) { + if (auto *iexpr{const_cast<OmpInitializerExpression *>( + GetInitializerExpr(clause))}) { + InstantiateForTypes( + *iexpr, *typeNames, OmpInitializerExpression::Variables()); + delete iexpr->state; + iexpr->state = nullptr; + } + } + } +} + +static void InstantiateStylizedDirective(OmpDirectiveSpecification &spec) { + const OmpDirectiveName &dirName{spec.DirName()}; + if (dirName.v == llvm::omp::Directive::OMPD_declare_reduction) { + InstantiateDeclareReduction(spec); + } +} + +template <typename P, + typename = std::enable_if_t< + std::is_same_v<typename P::resultType, OmpDirectiveSpecification>>> +struct OmpStylizedInstanceCreator { + using resultType = OmpDirectiveSpecification; + constexpr OmpStylizedInstanceCreator(P p) : parser_(p) {} + + std::optional<resultType> Parse(ParseState &state) const { + if (auto &&spec{parser_.Parse(state)}) { + InstantiateStylizedDirective(*spec); + return std::move(spec); + } + return std::nullopt; + } + +private: + const P parser_; +}; + +template <typename P> +OmpStylizedInstanceCreator(P) -> OmpStylizedInstanceCreator<P>; + +// --- Parsers for types ---------------------------------------------- + +TYPE_PARSER( // + sourced(construct<OmpTypeName>(Parser<DeclarationTypeSpec>{})) || + sourced(construct<OmpTypeName>(Parser<TypeSpec>{}))) + // --- Parsers for arguments ------------------------------------------ // At the moment these are only directive arguments. This is needed for @@ -366,10 +580,6 @@ struct OmpArgumentListParser { } }; -TYPE_PARSER( // - construct<OmpTypeName>(Parser<DeclarationTypeSpec>{}) || - construct<OmpTypeName>(Parser<TypeSpec>{})) - // 2.15.3.6 REDUCTION (reduction-identifier: variable-name-list) TYPE_PARSER(construct<OmpReductionIdentifier>(Parser<DefinedOperator>{}) || construct<OmpReductionIdentifier>(Parser<ProcedureDesignator>{})) @@ -1065,7 +1275,8 @@ TYPE_PARSER(construct<OmpOtherwiseClause>( TYPE_PARSER(construct<OmpWhenClause>( maybe(nonemptyList(Parser<OmpWhenClause::Modifier>{}) / ":"), - maybe(indirect(Parser<OmpDirectiveSpecification>{})))) + maybe(indirect( + OmpStylizedInstanceCreator(Parser<OmpDirectiveSpecification>{}))))) // OMP 5.2 12.6.1 grainsize([ prescriptiveness :] scalar-integer-expression) TYPE_PARSER(construct<OmpGrainsizeClause>( @@ -1777,12 +1988,7 @@ TYPE_PARSER( Parser<OpenMPInteropConstruct>{})) / endOfLine) -TYPE_PARSER(construct<OmpInitializerProc>(Parser<ProcedureDesignator>{}, - parenthesized(many(maybe(","_tok) >> Parser<ActualArgSpec>{})))) - -TYPE_PARSER(construct<OmpInitializerClause>( - construct<OmpInitializerClause>(assignmentStmt) || - construct<OmpInitializerClause>(Parser<OmpInitializerProc>{}))) +TYPE_PARSER(construct<OmpInitializerClause>(Parser<OmpInitializerExpression>{})) // OpenMP 5.2: 7.5.4 Declare Variant directive TYPE_PARSER(sourced(construct<OmpDeclareVariantDirective>( @@ -1794,7 +2000,7 @@ TYPE_PARSER(sourced(construct<OmpDeclareVariantDirective>( TYPE_PARSER(sourced(construct<OpenMPDeclareReductionConstruct>( predicated(Parser<OmpDirectiveName>{}, IsDirective(llvm::omp::Directive::OMPD_declare_reduction)) >= - Parser<OmpDirectiveSpecification>{}))) + OmpStylizedInstanceCreator(Parser<OmpDirectiveSpecification>{})))) // 2.10.6 Declare Target Construct TYPE_PARSER(sourced(construct<OpenMPDeclareTargetConstruct>( @@ -1832,8 +2038,8 @@ TYPE_PARSER(sourced(construct<OpenMPDeclareMapperConstruct>( IsDirective(llvm::omp::Directive::OMPD_declare_mapper)) >= Parser<OmpDirectiveSpecification>{}))) -TYPE_PARSER(construct<OmpCombinerExpression>(Parser<AssignmentStmt>{}) || - construct<OmpCombinerExpression>(Parser<FunctionReference>{})) +TYPE_PARSER(construct<OmpCombinerExpression>(OmpStylizedExpressionParser{})) +TYPE_PARSER(construct<OmpInitializerExpression>(OmpStylizedExpressionParser{})) TYPE_PARSER(sourced(construct<OpenMPCriticalConstruct>( OmpBlockConstructParser{llvm::omp::Directive::OMPD_critical}))) diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index 937a17f..95ad3f6 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -74,4 +74,16 @@ const BlockConstruct *GetFortranBlockConstruct( return nullptr; } +const OmpCombinerExpression *GetCombinerExpr( + const OmpReductionSpecifier &rspec) { + return addr_if(std::get<std::optional<OmpCombinerExpression>>(rspec.t)); +} + +const OmpInitializerExpression *GetInitializerExpr(const OmpClause &init) { + if (auto *wrapped{std::get_if<OmpClause::Initializer>(&init.u)}) { + return &wrapped->v.v; + } + return nullptr; +} + } // namespace Fortran::parser::omp diff --git a/flang/lib/Parser/parse-tree.cpp b/flang/lib/Parser/parse-tree.cpp index 8cbaa39..ad0016e 100644 --- a/flang/lib/Parser/parse-tree.cpp +++ b/flang/lib/Parser/parse-tree.cpp @@ -11,6 +11,7 @@ #include "flang/Common/indirection.h" #include "flang/Parser/tools.h" #include "flang/Parser/user-state.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Frontend/OpenMP/OMP.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -430,4 +431,30 @@ const OmpClauseList &OmpDirectiveSpecification::Clauses() const { } return empty; } + +static bool InitCharBlocksFromStrings(llvm::MutableArrayRef<CharBlock> blocks, + llvm::ArrayRef<std::string> strings) { + for (auto [i, n] : llvm::enumerate(strings)) { + blocks[i] = CharBlock(n); + } + return true; +} + +// The names should have static storage duration. Keep these names +// in a sigle place. +llvm::ArrayRef<CharBlock> OmpCombinerExpression::Variables() { + static std::string names[]{"omp_in", "omp_out"}; + static CharBlock vars[std::size(names)]; + + [[maybe_unused]] static bool init = InitCharBlocksFromStrings(vars, names); + return vars; +} + +llvm::ArrayRef<CharBlock> OmpInitializerExpression::Variables() { + static std::string names[]{"omp_orig", "omp_priv"}; + static CharBlock vars[std::size(names)]; + + [[maybe_unused]] static bool init = InitCharBlocksFromStrings(vars, names); + return vars; +} } // namespace Fortran::parser diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 2f86c76..9b38cfc 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1867,6 +1867,13 @@ public: [&](const CompilerDirective::NoUnrollAndJam &) { Word("!DIR$ NOUNROLL_AND_JAM"); }, + [&](const CompilerDirective::ForceInline &) { + Word("!DIR$ FORCEINLINE"); + }, + [&](const CompilerDirective::Inline &) { Word("!DIR$ INLINE"); }, + [&](const CompilerDirective::NoInline &) { + Word("!DIR$ NOINLINE"); + }, [&](const CompilerDirective::Unrecognized &) { Word("!DIR$ "); Word(x.source.ToString()); @@ -2088,15 +2095,13 @@ public: // OpenMP Clauses & Directives void Unparse(const OmpArgumentList &x) { Walk(x.v, ", "); } + void Unparse(const OmpTypeNameList &x) { Walk(x.v, ", "); } void Unparse(const OmpBaseVariantNames &x) { Walk(std::get<0>(x.t)); // OmpObject Put(":"); Walk(std::get<1>(x.t)); // OmpObject } - void Unparse(const OmpTypeNameList &x) { // - Walk(x.v, ","); - } void Unparse(const OmpMapperSpecifier &x) { const auto &mapperName{std::get<std::string>(x.t)}; if (mapperName.find(llvm::omp::OmpDefaultMapperName) == std::string::npos) { @@ -2195,6 +2200,15 @@ public: unsigned ompVersion{langOpts_.OpenMPVersion}; Word(llvm::omp::getOpenMPDirectiveName(x.v, ompVersion)); } + void Unparse(const OmpStylizedDeclaration &x) { + // empty + } + void Unparse(const OmpStylizedExpression &x) { // + Put(x.source.ToString()); + } + void Unparse(const OmpStylizedInstance &x) { + // empty + } void Unparse(const OmpIteratorSpecifier &x) { Walk(std::get<TypeDeclarationStmt>(x.t)); Put(" = "); @@ -2504,29 +2518,11 @@ public: void Unparse(const OpenMPCriticalConstruct &x) { Unparse(static_cast<const OmpBlockConstruct &>(x)); } - void Unparse(const OmpInitializerProc &x) { - Walk(std::get<ProcedureDesignator>(x.t)); - Put("("); - Walk(std::get<std::list<ActualArgSpec>>(x.t)); - Put(")"); - } - void Unparse(const OmpInitializerClause &x) { - // Don't let the visitor go to the normal AssignmentStmt Unparse function, - // it adds an extra newline that we don't want. - if (const auto *assignment{std::get_if<AssignmentStmt>(&x.u)}) { - Walk(assignment->t, " = "); - } else { - Walk(x.u); - } + void Unparse(const OmpInitializerExpression &x) { + Unparse(static_cast<const OmpStylizedExpression &>(x)); } void Unparse(const OmpCombinerExpression &x) { - // Don't let the visitor go to the normal AssignmentStmt Unparse function, - // it adds an extra newline that we don't want. - if (const auto *assignment{std::get_if<AssignmentStmt>(&x.u)}) { - Walk(assignment->t, " = "); - } else { - Walk(x.u); - } + Unparse(static_cast<const OmpStylizedExpression &>(x)); } void Unparse(const OpenMPDeclareReductionConstruct &x) { BeginOpenMP(); diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp index 104df25..a651a87 100644 --- a/flang/lib/Semantics/canonicalize-directives.cpp +++ b/flang/lib/Semantics/canonicalize-directives.cpp @@ -60,7 +60,11 @@ static bool IsExecutionDirective(const parser::CompilerDirective &dir) { std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(dir.u) || std::holds_alternative<parser::CompilerDirective::NoVector>(dir.u) || std::holds_alternative<parser::CompilerDirective::NoUnroll>(dir.u) || - std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(dir.u); + std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>( + dir.u) || + std::holds_alternative<parser::CompilerDirective::ForceInline>(dir.u) || + std::holds_alternative<parser::CompilerDirective::Inline>(dir.u) || + std::holds_alternative<parser::CompilerDirective::NoInline>(dir.u); } void CanonicalizationOfDirectives::Post(parser::SpecificationPart &spec) { diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 196755e..628068f 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -26,6 +26,8 @@ #include "flang/Semantics/symbol.h" #include "flang/Semantics/tools.h" #include "flang/Support/Flags.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Frontend/OpenMP/OMP.h.inc" #include "llvm/Support/Debug.h" #include <list> @@ -453,6 +455,21 @@ public: return true; } + bool Pre(const parser::OmpStylizedDeclaration &x) { + static llvm::StringMap<Symbol::Flag> map{ + {"omp_in", Symbol::Flag::OmpInVar}, + {"omp_orig", Symbol::Flag::OmpOrigVar}, + {"omp_out", Symbol::Flag::OmpOutVar}, + {"omp_priv", Symbol::Flag::OmpPrivVar}, + }; + if (auto &name{std::get<parser::ObjectName>(x.var.t)}; name.symbol) { + if (auto found{map.find(name.ToString())}; found != map.end()) { + ResolveOmp(name, found->second, + const_cast<Scope &>(DEREF(name.symbol).owner())); + } + } + return false; + } bool Pre(const parser::OmpMetadirectiveDirective &x) { PushContext(x.v.source, llvm::omp::Directive::OMPD_metadirective); return true; diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 561ebd2..0e6d4c7 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1605,6 +1605,12 @@ public: Post(static_cast<const parser::OmpDirectiveSpecification &>(x)); } + void Post(const parser::OmpTypeName &); + bool Pre(const parser::OmpStylizedDeclaration &); + void Post(const parser::OmpStylizedDeclaration &); + bool Pre(const parser::OmpStylizedInstance &); + void Post(const parser::OmpStylizedInstance &); + bool Pre(const parser::OpenMPDeclareMapperConstruct &x) { AddOmpSourceRange(x.source); return true; @@ -1615,18 +1621,6 @@ public: return true; } - bool Pre(const parser::OmpInitializerProc &x) { - auto &procDes = std::get<parser::ProcedureDesignator>(x.t); - auto &name = std::get<parser::Name>(procDes.u); - auto *symbol{FindSymbol(NonDerivedTypeScope(), name)}; - if (!symbol) { - context().Say(name.source, - "Implicit subroutine declaration '%s' in DECLARE REDUCTION"_err_en_US, - name.source); - } - return true; - } - bool Pre(const parser::OmpDeclareVariantDirective &x) { AddOmpSourceRange(x.source); return true; @@ -1772,14 +1766,6 @@ public: messageHandler().set_currStmtSource(std::nullopt); } - bool Pre(const parser::OmpTypeName &x) { - BeginDeclTypeSpec(); - return true; - } - void Post(const parser::OmpTypeName &x) { // - EndDeclTypeSpec(); - } - bool Pre(const parser::OpenMPConstruct &x) { // Indicate that the current directive is not a declarative one. declaratives_.push_back(nullptr); @@ -1835,6 +1821,30 @@ void OmpVisitor::Post(const parser::OmpBlockConstruct &x) { } } +void OmpVisitor::Post(const parser::OmpTypeName &x) { + x.declTypeSpec = GetDeclTypeSpec(); +} + +bool OmpVisitor::Pre(const parser::OmpStylizedDeclaration &x) { + BeginDecl(); + Walk(x.type.get()); + Walk(x.var); + return true; +} + +void OmpVisitor::Post(const parser::OmpStylizedDeclaration &x) { // + EndDecl(); +} + +bool OmpVisitor::Pre(const parser::OmpStylizedInstance &x) { + PushScope(Scope::Kind::OtherConstruct, nullptr); + return true; +} + +void OmpVisitor::Post(const parser::OmpStylizedInstance &x) { // + PopScope(); +} + bool OmpVisitor::Pre(const parser::OmpMapClause &x) { auto &mods{OmpGetModifiers(x)}; if (auto *mapper{OmpGetUniqueModifier<parser::OmpMapper>(mods)}) { @@ -1969,51 +1979,20 @@ void OmpVisitor::ProcessReductionSpecifier( } } - auto &typeList{std::get<parser::OmpTypeNameList>(spec.t)}; - - // Create a temporary variable declaration for the four variables - // used in the reduction specifier and initializer (omp_out, omp_in, - // omp_priv and omp_orig), with the type in the typeList. - // - // In theory it would be possible to create only variables that are - // actually used, but that requires walking the entire parse-tree of the - // expressions, and finding the relevant variables [there may well be other - // variables involved too]. - // - // This allows doing semantic analysis where the type is a derived type - // e.g omp_out%x = omp_out%x + omp_in%x. - // - // These need to be temporary (in their own scope). If they are created - // as variables in the outer scope, if there's more than one type in the - // typelist, duplicate symbols will be reported. - const parser::CharBlock ompVarNames[]{ - {"omp_in", 6}, {"omp_out", 7}, {"omp_priv", 8}, {"omp_orig", 8}}; - - for (auto &t : typeList.v) { - PushScope(Scope::Kind::OtherConstruct, nullptr); - BeginDeclTypeSpec(); - // We need to walk t.u because Walk(t) does it's own BeginDeclTypeSpec. - Walk(t.u); + reductionDetails->AddDecl(declaratives_.back()); - // Only process types we can find. There will be an error later on when - // a type isn't found. - if (const DeclTypeSpec *typeSpec{GetDeclTypeSpec()}) { - reductionDetails->AddType(*typeSpec); + // Do not walk OmpTypeNameList. The types on the list will be visited + // during procesing of OmpCombinerExpression. + Walk(std::get<std::optional<parser::OmpCombinerExpression>>(spec.t)); + Walk(clauses); - for (auto &nm : ompVarNames) { - ObjectEntityDetails details{}; - details.set_type(*typeSpec); - MakeSymbol(nm, Attrs{}, std::move(details)); - } + for (auto &type : std::get<parser::OmpTypeNameList>(spec.t).v) { + // The declTypeSpec can be null if there is some semantic error. + if (type.declTypeSpec) { + reductionDetails->AddType(*type.declTypeSpec); } - EndDeclTypeSpec(); - Walk(std::get<std::optional<parser::OmpCombinerExpression>>(spec.t)); - Walk(clauses); - PopScope(); } - reductionDetails->AddDecl(declaratives_.back()); - if (!symbol) { symbol = &MakeSymbol(mangledName, Attrs{}, std::move(*reductionDetails)); } @@ -10078,7 +10057,10 @@ void ResolveNamesVisitor::Post(const parser::CompilerDirective &x) { std::holds_alternative<parser::CompilerDirective::UnrollAndJam>(x.u) || std::holds_alternative<parser::CompilerDirective::NoVector>(x.u) || std::holds_alternative<parser::CompilerDirective::NoUnroll>(x.u) || - std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(x.u)) { + std::holds_alternative<parser::CompilerDirective::NoUnrollAndJam>(x.u) || + std::holds_alternative<parser::CompilerDirective::ForceInline>(x.u) || + std::holds_alternative<parser::CompilerDirective::Inline>(x.u) || + std::holds_alternative<parser::CompilerDirective::NoInline>(x.u)) { return; } if (const auto *tkr{ diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 5182950..59af58d 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -1998,6 +1998,18 @@ implicit none ! TMA Operations + interface barrier_arrive + attributes(device) function barrier_arrive(barrier) result(token) + integer(8), shared :: barrier + integer(8) :: token + end function + attributes(device) function barrier_arrive_cnt(barrier, count) result(token) + integer(8), shared :: barrier + integer(4), value :: count + integer(8) :: token + end function + end interface + interface attributes(device) subroutine barrier_init(barrier, count) integer(8), shared :: barrier @@ -2005,15 +2017,18 @@ implicit none end subroutine end interface - interface barrier_arrive - attributes(device) function barrier_arrive(barrier) result(token) + interface + attributes(device) integer function barrier_try_wait(barrier, token) integer(8), shared :: barrier - integer(8) :: token + integer(8), value :: token end function - attributes(device) function barrier_arrive_cnt(barrier, count) result(token) + end interface + + interface + attributes(device) integer function barrier_try_wait_sleep(barrier, token, ns) integer(8), shared :: barrier - integer(4), value :: count - integer(8) :: token + integer(8), value :: token + integer(4), value :: ns end function end interface @@ -2032,7 +2047,13 @@ implicit none end subroutine end interface + ! -------------------- + ! Bulk load functions + ! -------------------- + ! Generic load, count is in bytes + ! ------------------------------- + interface attributes(device) subroutine tma_bulk_g2s(barrier, src, dst, nbytes) !dir$ ignore_tkr src, dst @@ -2043,6 +2064,74 @@ implicit none end subroutine end interface + ! Load specific types, count is in elements + ! ----------------------------------------- + + interface tma_bulk_load + attributes(device) subroutine tma_bulk_ldc4(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + complex(4), device :: src(*) + complex(4), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldc8(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + complex(8), device :: src(*) + complex(8), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldi4(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + integer(4), device :: src(*) + integer(4), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldi8(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + integer(8), device :: src(*) + integer(8), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldr2(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + real(2), device :: src(*) + real(2), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldr4(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + real(4), device :: src(*) + real(4), shared :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_ldr8(barrier, src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: barrier + real(8), device :: src(*) + real(8), shared :: dst(*) + integer(4), value :: nelems + end subroutine + end interface + + ! -------------------- + ! Bulk Store functions + ! -------------------- + + ! Generic store, count is in bytes + ! -------------------------------- + interface attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes) !dir$ ignore_tkr src, dst @@ -2052,6 +2141,60 @@ implicit none end subroutine end interface + ! Load specific types, count is in elements + ! ----------------------------------------- + + interface tma_bulk_store + attributes(device) subroutine tma_bulk_store_c4(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + complex(4), shared :: src(*) + complex(4), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_c8(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + complex(8), shared :: src(*) + complex(8), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_i4(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(4), shared :: src(*) + integer(4), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_i8(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + integer(8), shared :: src(*) + integer(8), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_r2(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + real(2), shared :: src(*) + real(2), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_r4(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + real(4), shared :: src(*) + real(4), device :: dst(*) + integer(4), value :: nelems + end subroutine + + attributes(device) subroutine tma_bulk_store_r8(src, dst, nelems) + !dir$ ignore_tkr (r) src, (r) dst + real(8), shared :: src(*) + real(8), device :: dst(*) + integer(4), value :: nelems + end subroutine + end interface + contains attributes(device) subroutine syncthreads() diff --git a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir index 48fee10..5562e00 100644 --- a/flang/test/Fir/CUDA/cuda-target-rewrite.mlir +++ b/flang/test/Fir/CUDA/cuda-target-rewrite.mlir @@ -108,3 +108,23 @@ module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.k } } +// ----- + +module attributes {gpu.container_module, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.target_triple = "x86_64-unknown-linux-gnu"} { + gpu.module @testmod { + gpu.func @_QPtest(%arg0: complex<f32>) -> () kernel { + gpu.return + } + } + func.func @main(%arg0: complex<f32>) { + %0 = llvm.mlir.constant(0 : i64) : i64 + %1 = llvm.mlir.constant(0 : i32) : i32 + %2 = fir.alloca i64 + %3 = cuf.stream_cast %2 : !fir.ref<i64> + %4 = gpu.launch_func async [%3] @testmod::@_QPtest blocks in (%0, %0, %0) threads in (%0, %0, %0) : i64 dynamic_shared_memory_size %1 args(%arg0 : complex<f32>) {cuf.proc_attr = #cuf.cuda_proc<global>} + return + } +} + +// CHECK-LABEL: func.func @main +// CHECK: %{{.*}} = gpu.launch_func async [%{{.*}}] @testmod::@_QPtest blocks in (%{{.*}}, %{{.*}}, %{{.*}}) threads in (%{{.*}}, %{{.*}}, %{{.*}}) : i64 dynamic_shared_memory_size %{{.*}} args(%{{.*}} : !fir.vector<2:f32>) {cuf.proc_attr = #cuf.cuda_proc<global>} diff --git a/flang/test/Integration/inline_directive.f90 b/flang/test/Integration/inline_directive.f90 new file mode 100644 index 0000000..1f05384 --- /dev/null +++ b/flang/test/Integration/inline_directive.f90 @@ -0,0 +1,69 @@ +! This directory can be used to add Integration tests involving multiple stages of the compiler (for eg. from Fortran to LLVM IR). +! It should not contain executable tests. We should only add tests here sparingly and only if there is no other way to test. +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s + +! CHECK-LABEL: test_inline +subroutine test_inline() + integer :: x, y +!CHECK: %[[VAL_1:.*]] = alloca i32, i64 1, align 4 +!CHECK: %[[VAL_2:.*]] = alloca i32, i64 1, align 4 +!CHECK: %[[VAL_3:.*]] = alloca i32, i64 1, align 4 +!CHECK: %[[VAL_4:.*]] = alloca i32, i64 1, align 4 + + !dir$ forceinline + y = g(x) + !dir$ forceinline + call f(x, y) +!CHECK: %[[VAL_5:.*]] = load i32, ptr %[[VAL_3]], align 4 +!CHECK: %[[VAL_6:.*]] = mul i32 %[[VAL_5]], 2 +!CHECK: store i32 %6, ptr %[[VAL_1]], align 4 +!CHECK: %[[VAL_7:.*]] = load i32, ptr %[[VAL_1]], align 4 +!CHECK: store i32 %7, ptr %[[VAL_2]], align 4 +!CHECK: %[[VAL_8:.]] = load i32, ptr %[[VAL_3]], align 4 +!CHECK: %[[VAL_9:.]] = mul i32 %[[VAL_8]], 2 +!CHECK: store i32 %9, ptr %[[VAL_2]], align 4 + + !dir$ inline + y = g(x) + !dir$ inline + call f(x, y) +!CHECK: %[[VAL_10:.*]] = call i32 @_QFtest_inlinePg(ptr %[[VAL_3]]) #[[INLINE:.*]] +!CHECK: store i32 %[[VAL_10]], ptr %[[VAL_2]], align 4 +!CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[INLINE]] + + !dir$ inline + do i = 1, 100 + call f(x, y) + !CHECK: br i1 %[[VAL_14:.*]], label %[[VAL_15:.*]], label %[[VAL_19:.*]] + !CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[INLINE]] + enddo + + !dir$ noinline + y = g(x) + !dir$ noinline + call f(x, y) +!CHECK: %[[VAL_10:.*]] = call i32 @_QFtest_inlinePg(ptr %[[VAL_3]]) #[[NOINLINE:.*]] +!CHECK: store i32 %[[VAL_10]], ptr %[[VAL_2]], align 4 +!CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[NOINLINE]] + + !dir$ noinline + do i = 1, 100 + call f(x, y) + !CHECK: br i1 %[[VAL_14:.*]], label %[[VAL_15:.*]], label %[[VAL_19:.*]] + !CHECK: call void @_QFtest_inlinePf(ptr %[[VAL_3]], ptr %[[VAL_2]]) #[[NOINLINE]] + enddo + + contains + subroutine f(x, y) + integer, intent(in) :: x + integer, intent(out) :: y + y = x*2 + end subroutine f + integer function g(x) + integer :: x + g = x*2 + end function g +end subroutine test_inline + +!CHECK: attributes #[[INLINE]] = { inlinehint } +!CHECK: attributes #[[NOINLINE]] = { noinline } diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5c4c3c6..8f35521 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -479,6 +479,8 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_s2g ! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(device) subroutine testAtomicCasLoop(aa, n) integer :: a @@ -492,3 +494,250 @@ end subroutine ! CHECK: %[[CASTED_CMP_XCHG_EV:.*]] = fir.convert %[[CMP_XCHG_EV]] : (i1) -> i32 ! CHECK: %{{.*}} = arith.constant 1 : i32 ! CHECK: %19 = arith.cmpi eq, %[[CASTED_CMP_XCHG_EV]], %{{.*}} : i32 + +attributes(global) subroutine test_barrier_try_wait() + integer :: istat + integer(8), shared :: barrier1 + integer(8) :: token + istat = barrier_try_wait(barrier1, token) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_barrier_try_wait() +! CHECK: scf.while +! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32 + +attributes(global) subroutine test_barrier_try_wait_sleep() + integer :: istat + integer(8), shared :: barrier1 + integer(8) :: token + integer(4) :: sleep_time + istat = barrier_try_wait_sleep(barrier1, token, sleep_time) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep() +! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32 + +attributes(global) subroutine test_tma_bulk_load_c4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + complex(4), device :: r8(n) + complex(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xcomplex<f32>>>, !fir.ref<complex<f32>>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_c8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + complex(8), device :: r8(n) + complex(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xcomplex<f64>>>, !fir.ref<complex<f64>>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_i4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + integer(4), device :: r8(n) + integer(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xi32>>, !fir.ref<i32>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_i8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + integer(8), device :: r8(n) + integer(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xi64>>, !fir.ref<i64>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r2(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(2), device :: r8(n) + real(2), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r2 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r2Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r2Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf16>>, !fir.ref<f16>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(4), device :: r8(n) + real(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf32>>, !fir.ref<f32>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(8), device :: r8(n) + real(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !fir.ref<!fir.array<1024xf64>>, !fir.ref<f64>, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_store_c4(c, n) + integer, value :: n + complex(4), device :: c(n) + complex(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_c8(c, n) + integer, value :: n + complex(8), device :: c(n) + complex(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_i4(c, n) + integer, value :: n + integer(4), device :: c(n) + integer(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_i8(c, n) + integer, value :: n + integer(8), device :: c(n) + integer(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + + +attributes(global) subroutine test_tma_bulk_store_r2(c, n) + integer, value :: n + real(2), device :: c(n) + real(2), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_r4(c, n) + integer, value :: n + real(4), device :: c(n) + real(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_r8(c, n) + integer, value :: n + real(8), device :: c(n) + real(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 diff --git a/flang/test/Lower/inline_directive.f90 b/flang/test/Lower/inline_directive.f90 new file mode 100644 index 0000000..347df85 --- /dev/null +++ b/flang/test/Lower/inline_directive.f90 @@ -0,0 +1,61 @@ +! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s + +subroutine test_inline() + integer :: x, y +!CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_inlineEx"} +!CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtest_inlineEx"} : (!fir.ref<i32>) -> !fir.ref<i32> +!CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFtest_inlineEy"} +!CHECK: %[[VAL_3:.*]] = fir.declare %[[VAL_2]] {uniq_name = "_QFtest_inlineEy"} : (!fir.ref<i32>) -> !fir.ref<i32> + + !dir$ forceinline + y = g(x) + !CHECK: %[[VAL_4:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref<i32> + + !dir$ forceinline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ noinline + y = g(x) + 7 * (8 + g(y)) + !CHECK: %[[VAL_8:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: %[[VAL_9:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[C8:.*]] : i32 + !CHECK: %[[VAL_11:.*]] = fir.no_reassoc %[[VAL_10]] : i32 + !CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[C7:.*]] : i32 + !CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 + !CHECK: fir.store %[[VAL_13]] to %[[VAL_3]] : !fir.ref<i32> + + !dir$ noinline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ inline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ forceinline + do i = 1, 100 + !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) { + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + call f(x, y) + enddo + + !dir$ inline + do i = 1, 100 + !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) { + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + call f(x, y) + enddo +!CHECK: return + contains + subroutine f(x, y) + integer, intent(in) :: x + integer, intent(out) :: y + y = x*2 + end subroutine f + integer function g(x) + integer :: x + g = x*2 + end function g +end subroutine test_inline diff --git a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 index a682958..8856661 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-multi.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-multi.f90 @@ -26,7 +26,8 @@ program omp_examples type(tt) :: values(n), sum, prod, big, small !$omp declare reduction(+:tt:omp_out%r = omp_out%r + omp_in%r) initializer(omp_priv%r = 0) -!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out%r = omp_out%r+omp_in%r) INITIALIZER(omp_priv%r = 0_4) +!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out%r = omp_out%r + omp_in%r) INITIALIZER(om& +!CHECK-NEXT: !$OMP&p_priv%r = 0) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -34,11 +35,39 @@ program omp_examples !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=omp_out%r+omp_in%r' +!PARSE-TREE: | | | | Variable = 'omp_out%r' +!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Name = 'r' +!PARSE-TREE: | | | | Expr = 'omp_out%r+omp_in%r' +!PARSE-TREE: | | | | | Add +!PARSE-TREE: | | | | | | Expr = 'omp_out%r' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | Name = 'r' +!PARSE-TREE: | | | | | | Expr = 'omp_in%r' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | Name = 'r' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=0._4' +!PARSE-TREE: | | | Variable = 'omp_priv%r' +!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | | | Name = 'r' +!PARSE-TREE: | | | Expr = '0_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None !$omp declare reduction(*:tt:omp_out%r = omp_out%r * omp_in%r) initializer(omp_priv%r = 1) -!CHECK-NEXT: !$OMP DECLARE REDUCTION(*:tt: omp_out%r = omp_out%r*omp_in%r) INITIALIZER(omp_priv%r = 1_4) +!CHECK-NEXT: !$OMP DECLARE REDUCTION(*:tt: omp_out%r = omp_out%r * omp_in%r) INITIALIZER(om& +!CHECK-NEXT: !$OMP&p_priv%r = 1) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -46,11 +75,39 @@ program omp_examples !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Multiply !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=omp_out%r*omp_in%r' +!PARSE-TREE: | | | | Variable = 'omp_out%r' +!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Name = 'r' +!PARSE-TREE: | | | | Expr = 'omp_out%r*omp_in%r' +!PARSE-TREE: | | | | | Multiply +!PARSE-TREE: | | | | | | Expr = 'omp_out%r' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | Name = 'r' +!PARSE-TREE: | | | | | | Expr = 'omp_in%r' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | Name = 'r' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=1._4' +!PARSE-TREE: | | | Variable = 'omp_priv%r' +!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | | | Name = 'r' +!PARSE-TREE: | | | Expr = '1_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | Flags = None !$omp declare reduction(max:tt:omp_out = mymax(omp_out, omp_in)) initializer(omp_priv%r = 0) -!CHECK-NEXT: !$OMP DECLARE REDUCTION(max:tt: omp_out = mymax(omp_out,omp_in)) INITIALIZER(omp_priv%r = 0_4) +!CHECK-NEXT: !$OMP DECLARE REDUCTION(max:tt: omp_out = mymax(omp_out, omp_in)) INITIALIZER(& +!CHECK-NEXT: !$OMP&omp_priv%r = 0) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -58,11 +115,36 @@ program omp_examples !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'max' !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=0._4' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=mymax(omp_out,omp_in)' +!PARSE-TREE: | | | | Variable = 'omp_out' +!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | Expr = 'mymax(omp_out,omp_in)' +!PARSE-TREE: | | | | | FunctionReference -> Call +!PARSE-TREE: | | | | | | ProcedureDesignator -> Name = 'mymax' +!PARSE-TREE: | | | | | | ActualArgSpec +!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_out' +!PARSE-TREE: | | | | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | ActualArgSpec +!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_in' +!PARSE-TREE: | | | | | | | | Designator -> DataRef -> Name = 'omp_in' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=0._4' +!PARSE-TREE: | | | Variable = 'omp_priv%r' +!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | | | Name = 'r' +!PARSE-TREE: | | | Expr = '0_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None !$omp declare reduction(min:tt:omp_out%r = min(omp_out%r, omp_in%r)) initializer(omp_priv%r = 1) -!CHECK-NEXT: !$OMP DECLARE REDUCTION(min:tt: omp_out%r = min(omp_out%r,omp_in%r)) INITIALIZER(omp_priv%r = 1_4) +!CHECK-NEXT: !$OMP DECLARE REDUCTION(min:tt: omp_out%r = min(omp_out%r, omp_in%r)) INITIALI& +!CHECK-NEXT: !$OMP&ZER(omp_priv%r = 1) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -70,8 +152,38 @@ program omp_examples !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'min' !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv%r=1._4' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%r=min(omp_out%r,omp_in%r)' +!PARSE-TREE: | | | | Variable = 'omp_out%r' +!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Name = 'r' +!PARSE-TREE: | | | | Expr = 'min(omp_out%r,omp_in%r)' +!PARSE-TREE: | | | | | FunctionReference -> Call +!PARSE-TREE: | | | | | | ProcedureDesignator -> Name = 'min' +!PARSE-TREE: | | | | | | ActualArgSpec +!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_out%r' +!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | Name = 'r' +!PARSE-TREE: | | | | | | ActualArgSpec +!PARSE-TREE: | | | | | | | ActualArg -> Expr = 'omp_in%r' +!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | Name = 'r' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv%r=1._4' +!PARSE-TREE: | | | Variable = 'omp_priv%r' +!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | | | Name = 'r' +!PARSE-TREE: | | | Expr = '1_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '1' +!PARSE-TREE: | Flags = None call random_number(values%r) diff --git a/flang/test/Parser/OpenMP/declare-reduction-operator.f90 b/flang/test/Parser/OpenMP/declare-reduction-operator.f90 index e4d07c8..0d337c1 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-operator.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-operator.f90 @@ -16,7 +16,8 @@ subroutine reduce_1 ( n, tts ) type(tt) :: tts(n) type(tt2) :: tts2(n) -!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out = tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)) INITIALIZER(omp_priv = tt(x=0_4,y=0_4)) +!CHECK: !$OMP DECLARE REDUCTION(+:tt: omp_out = tt(omp_out%x - omp_in%x , omp_out%y - & +!CHECK: !$OMP&omp_in%y)) INITIALIZER(omp_priv = tt(0,0)) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -24,13 +25,60 @@ subroutine reduce_1 ( n, tts ) !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt(x=0_4,y=0_4)' - +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | | | Variable = 'omp_out' +!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | Expr = 'tt(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | | | | StructureConstructor +!PARSE-TREE: | | | | | | DerivedTypeSpec +!PARSE-TREE: | | | | | | | Name = 'tt' +!PARSE-TREE: | | | | | | ComponentSpec +!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%x-omp_in%x' +!PARSE-TREE: | | | | | | | | Subtract +!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%x' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%x' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | ComponentSpec +!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%y-omp_in%y' +!PARSE-TREE: | | | | | | | | Subtract +!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%y' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | | Name = 'y' +!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%y' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | | Name = 'y' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=tt(x=0_4,y=0_4)' +!PARSE-TREE: | | | Variable = 'omp_priv' +!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | Expr = 'tt(x=0_4,y=0_4)' +!PARSE-TREE: | | | | StructureConstructor +!PARSE-TREE: | | | | | DerivedTypeSpec +!PARSE-TREE: | | | | | | Name = 'tt' +!PARSE-TREE: | | | | | ComponentSpec +!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4' +!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | | | | | ComponentSpec +!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4' +!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None !$omp declare reduction(+ : tt : omp_out = tt(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt(0,0)) -!CHECK: !$OMP DECLARE REDUCTION(+:tt2: omp_out = tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)) INITIALIZER(omp_priv = tt2(x=0._8,y=0._8) +!CHECK: !$OMP DECLARE REDUCTION(+:tt2: omp_out = tt2(omp_out%x - omp_in%x , omp_out%y & +!CHECK: !$OMP&- omp_in%y)) INITIALIZER(omp_priv = tt2(0,0)) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -38,9 +86,55 @@ subroutine reduce_1 ( n, tts ) !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 'tt2' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=tt2(x=0._8,y=0._8)' - +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | | | Variable = 'omp_out' +!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | Expr = 'tt2(x=omp_out%x-omp_in%x,y=omp_out%y-omp_in%y)' +!PARSE-TREE: | | | | | StructureConstructor +!PARSE-TREE: | | | | | | DerivedTypeSpec +!PARSE-TREE: | | | | | | | Name = 'tt2' +!PARSE-TREE: | | | | | | ComponentSpec +!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%x-omp_in%x' +!PARSE-TREE: | | | | | | | | Subtract +!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%x' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%x' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | ComponentSpec +!PARSE-TREE: | | | | | | | ComponentDataSource -> Expr = 'omp_out%y-omp_in%y' +!PARSE-TREE: | | | | | | | | Subtract +!PARSE-TREE: | | | | | | | | | Expr = 'omp_out%y' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | | Name = 'y' +!PARSE-TREE: | | | | | | | | | Expr = 'omp_in%y' +!PARSE-TREE: | | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | | Name = 'y' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=tt2(x=0._8,y=0._8)' +!PARSE-TREE: | | | Variable = 'omp_priv' +!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | Expr = 'tt2(x=0._8,y=0._8)' +!PARSE-TREE: | | | | StructureConstructor +!PARSE-TREE: | | | | | DerivedTypeSpec +!PARSE-TREE: | | | | | | Name = 'tt2' +!PARSE-TREE: | | | | | ComponentSpec +!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4' +!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | | | | | ComponentSpec +!PARSE-TREE: | | | | | | ComponentDataSource -> Expr = '0_4' +!PARSE-TREE: | | | | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None !$omp declare reduction(+ :tt2 : omp_out = tt2(omp_out%x - omp_in%x , omp_out%y - omp_in%y)) initializer(omp_priv = tt2(0,0)) type(tt) :: diffp = tt( 0, 0 ) diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 index 455fc17..f026f15 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-unparse-with-symbols.f90 @@ -8,6 +8,6 @@ end !CHECK: !DEF: /f00 (Subroutine) Subprogram !CHECK: subroutine f00 -!CHECK: !$omp declare reduction(fred:integer,real: omp_out = omp_in+omp_out) +!CHECK: !$omp declare reduction(fred:integer, real: omp_out = omp_in + omp_out) !CHECK: end subroutine diff --git a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 index 73d7ccf..7897eb0 100644 --- a/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 +++ b/flang/test/Parser/OpenMP/declare-reduction-unparse.f90 @@ -19,7 +19,8 @@ function func(x, n, init) end subroutine initme end interface !$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0)) -!CHECK: !$OMP DECLARE REDUCTION(red_add:INTEGER(KIND=4_4): omp_out = omp_out+omp_in) INITIALIZER(initme(omp_priv, 0_4)) +!CHECK: !$OMP DECLARE REDUCTION(red_add:INTEGER(KIND=4_4): omp_out=omp_out+omp_in) INITIA& +!CHECKL !$OMP&LIZER(initme(omp_priv,0)) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare reduction @@ -27,9 +28,31 @@ function func(x, n, init) !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'red_add' !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> KindSelector -> Scalar -> Integer -> Constant -> Expr = '4_4' !PARSE-TREE: | | | LiteralConstant -> IntLiteralConstant = '4' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerProc -!PARSE-TREE: | | ProcedureDesignator -> Name = 'initme' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=omp_out+omp_in' +!PARSE-TREE: | | | | Variable = 'omp_out' +!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | Expr = 'omp_out+omp_in' +!PARSE-TREE: | | | | | Add +!PARSE-TREE: | | | | | | Expr = 'omp_out' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Expr = 'omp_in' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_in' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> CallStmt = 'CALL initme(omp_priv,0_4)' +!PARSE-TREE: | | | Call +!PARSE-TREE: | | | | ProcedureDesignator -> Name = 'initme' +!PARSE-TREE: | | | | ActualArgSpec +!PARSE-TREE: | | | | | ActualArg -> Expr = 'omp_priv' +!PARSE-TREE: | | | | | | Designator -> DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | | ActualArgSpec +!PARSE-TREE: | | | | | ActualArg -> Expr = '0_4' +!PARSE-TREE: | | | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None res=init !$omp simd reduction(red_add:res) @@ -59,7 +82,8 @@ end function func !CHECK-LABEL: program main program main integer :: my_var -!CHECK: !$OMP DECLARE REDUCTION(my_add_red:INTEGER: omp_out = omp_out+omp_in) INITIALIZER(omp_priv = 0_4) +!CHECK: !$OMP DECLARE REDUCTION(my_add_red:INTEGER: omp_out = omp_out + omp_in) INITIA& +!CHECK: !$OMP&LIZER(omp_priv=0) !$omp declare reduction (my_add_red : integer : omp_out = omp_out + omp_in) initializer (omp_priv=0) my_var = 0 @@ -74,5 +98,24 @@ end program main !PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpReductionSpecifier !PARSE-TREE: | | OmpReductionIdentifier -> ProcedureDesignator -> Name = 'my_add_red' !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out=omp_out+omp_in' -!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> AssignmentStmt = 'omp_priv=0_4' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out=omp_out+omp_in' +!PARSE-TREE: | | | | Variable = 'omp_out' +!PARSE-TREE: | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | Expr = 'omp_out+omp_in' +!PARSE-TREE: | | | | | Add +!PARSE-TREE: | | | | | | Expr = 'omp_out' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Expr = 'omp_in' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> Name = 'omp_in' +!PARSE-TREE: | OmpClauseList -> OmpClause -> Initializer -> OmpInitializerClause -> OmpInitializerExpression -> OmpStylizedInstance +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | OmpStylizedDeclaration +!PARSE-TREE: | | Instance -> AssignmentStmt = 'omp_priv=0_4' +!PARSE-TREE: | | | Variable = 'omp_priv' +!PARSE-TREE: | | | | Designator -> DataRef -> Name = 'omp_priv' +!PARSE-TREE: | | | Expr = '0_4' +!PARSE-TREE: | | | | LiteralConstant -> IntLiteralConstant = '0' +!PARSE-TREE: | Flags = None diff --git a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 index c373001..b64ceb1 100644 --- a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 +++ b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 @@ -105,8 +105,8 @@ end !UNPARSE: TYPE :: tt2 !UNPARSE: REAL :: x !UNPARSE: END TYPE -!UNPARSE: !$OMP METADIRECTIVE WHEN(USER={CONDITION(.true._4)}: DECLARE REDUCTION(+:tt1,tt2: omp_out%x = omp_in%x+omp_out%x)& -!UNPARSE: !$OMP&) +!UNPARSE: !$OMP METADIRECTIVE WHEN(USER={CONDITION(.true._4)}: DECLARE REDUCTION(+:tt1, tt2: omp& +!UNPARSE: !$OMP&_out%x = omp_in%x + omp_out%x)) !UNPARSE: END SUBROUTINE !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpMetadirectiveDirective @@ -127,21 +127,44 @@ end !PARSE-TREE: | | | | | Name = 'tt1' !PARSE-TREE: | | | | OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | | | Name = 'tt2' -!PARSE-TREE: | | | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x' -!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_out' -!PARSE-TREE: | | | | | | | Name = 'x' -!PARSE-TREE: | | | | | Expr = 'omp_in%x+omp_out%x' -!PARSE-TREE: | | | | | | Add -!PARSE-TREE: | | | | | | | Expr = 'omp_in%x' -!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_in' -!PARSE-TREE: | | | | | | | | | Name = 'x' -!PARSE-TREE: | | | | | | | Expr = 'omp_out%x' -!PARSE-TREE: | | | | | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | | | | | DataRef -> Name = 'omp_out' -!PARSE-TREE: | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | | | OmpStylizedDeclaration +!PARSE-TREE: | | | | | OmpStylizedDeclaration +!PARSE-TREE: | | | | | Instance -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x' +!PARSE-TREE: | | | | | | Variable = 'omp_out%x' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | Expr = 'omp_in%x+omp_out%x' +!PARSE-TREE: | | | | | | | Add +!PARSE-TREE: | | | | | | | | Expr = 'omp_in%x' +!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | | | Expr = 'omp_out%x' +!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | OmpStylizedInstance +!PARSE-TREE: | | | | | OmpStylizedDeclaration +!PARSE-TREE: | | | | | OmpStylizedDeclaration +!PARSE-TREE: | | | | | Instance -> AssignmentStmt = 'omp_out%x=omp_in%x+omp_out%x' +!PARSE-TREE: | | | | | | Variable = 'omp_out%x' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | Expr = 'omp_in%x+omp_out%x' +!PARSE-TREE: | | | | | | | Add +!PARSE-TREE: | | | | | | | | Expr = 'omp_in%x' +!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | | | Expr = 'omp_out%x' +!PARSE-TREE: | | | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | | | Name = 'x' !PARSE-TREE: | | | OmpClauseList -> +!PARSE-TREE: | | | Flags = None subroutine f04 !$omp metadirective when(user={condition(.true.)}: & diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 index 39e8f05..50a38c6 100644 --- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 +++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 @@ -79,7 +79,7 @@ end !UNPARSE: TYPE :: t !UNPARSE: INTEGER :: x !UNPARSE: END TYPE -!UNPARSE: !$OMP DECLARE_REDUCTION(+:t: omp_out%x = omp_out%x+omp_in%x) +!UNPARSE: !$OMP DECLARE_REDUCTION(+:t: omp_out%x = omp_out%x + omp_in%x) !UNPARSE: END SUBROUTINE !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OpenMPDeclareReductionConstruct -> OmpDirectiveSpecification @@ -88,21 +88,24 @@ end !PARSE-TREE: | | OmpReductionIdentifier -> DefinedOperator -> IntrinsicOperator = Add !PARSE-TREE: | | OmpTypeNameList -> OmpTypeName -> TypeSpec -> DerivedTypeSpec !PARSE-TREE: | | | Name = 't' -!PARSE-TREE: | | OmpCombinerExpression -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x' -!PARSE-TREE: | | | Variable = 'omp_out%x' -!PARSE-TREE: | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | DataRef -> Name = 'omp_out' -!PARSE-TREE: | | | | | Name = 'x' -!PARSE-TREE: | | | Expr = 'omp_out%x+omp_in%x' -!PARSE-TREE: | | | | Add -!PARSE-TREE: | | | | | Expr = 'omp_out%x' -!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_out' -!PARSE-TREE: | | | | | | | Name = 'x' -!PARSE-TREE: | | | | | Expr = 'omp_in%x' -!PARSE-TREE: | | | | | | Designator -> DataRef -> StructureComponent -!PARSE-TREE: | | | | | | | DataRef -> Name = 'omp_in' -!PARSE-TREE: | | | | | | | Name = 'x' +!PARSE-TREE: | | OmpCombinerExpression -> OmpStylizedInstance +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | OmpStylizedDeclaration +!PARSE-TREE: | | | Instance -> AssignmentStmt = 'omp_out%x=omp_out%x+omp_in%x' +!PARSE-TREE: | | | | Variable = 'omp_out%x' +!PARSE-TREE: | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | Name = 'x' +!PARSE-TREE: | | | | Expr = 'omp_out%x+omp_in%x' +!PARSE-TREE: | | | | | Add +!PARSE-TREE: | | | | | | Expr = 'omp_out%x' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_out' +!PARSE-TREE: | | | | | | | | Name = 'x' +!PARSE-TREE: | | | | | | Expr = 'omp_in%x' +!PARSE-TREE: | | | | | | | Designator -> DataRef -> StructureComponent +!PARSE-TREE: | | | | | | | | DataRef -> Name = 'omp_in' +!PARSE-TREE: | | | | | | | | Name = 'x' !PARSE-TREE: | OmpClauseList -> !PARSE-TREE: | Flags = None diff --git a/flang/test/Parser/compiler-directives.f90 b/flang/test/Parser/compiler-directives.f90 index 04d22ff..b2fe4663 100644 --- a/flang/test/Parser/compiler-directives.f90 +++ b/flang/test/Parser/compiler-directives.f90 @@ -72,3 +72,27 @@ subroutine no_vector do i=1,10 enddo end subroutine + +subroutine inline + integer :: a + !dir$ forceinline + ! CHECK: !DIR$ FORCEINLINE + a = f(2) + + !dir$ inline + ! CHECK: !DIR$ INLINE + call g() + + !dir$ noinline + ! CHECK: !DIR$ NOINLINE + call g() + + contains + function f(x) + integer :: x + f = x**2 + end function + + subroutine g() + end subroutine +end subroutine diff --git a/flang/test/Semantics/OpenMP/declare-reduction-error.f90 b/flang/test/Semantics/OpenMP/declare-reduction-error.f90 deleted file mode 100644 index 21f5cc1..0000000 --- a/flang/test/Semantics/OpenMP/declare-reduction-error.f90 +++ /dev/null @@ -1,11 +0,0 @@ -! RUN: not %flang_fc1 -emit-obj -fopenmp -fopenmp-version=50 %s 2>&1 | FileCheck %s - -subroutine initme(x,n) - integer x,n - x=n -end subroutine initme - -subroutine subr - !$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0)) - !CHECK: error: Implicit subroutine declaration 'initme' in DECLARE REDUCTION -end subroutine subr diff --git a/flang/test/Semantics/OpenMP/declare-reduction-functions.f90 b/flang/test/Semantics/OpenMP/declare-reduction-functions.f90 index 000d323..89e0771 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-functions.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-functions.f90 @@ -57,9 +57,10 @@ contains !CHECK: adder: UserReductionDetails TYPE(two) !CHECK OtherConstruct scope !CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two) -!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two) -!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two) -!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two) +!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two) +!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two) !$omp simd reduction(adder:res) @@ -101,14 +102,16 @@ contains !CHECK: adder: UserReductionDetails TYPE(two) TYPE(three) !CHECK OtherConstruct scope !CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two) -!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two) -!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two) -!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two) +!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two) +!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two) !CHECK OtherConstruct scope !CHECK: omp_in size=24 offset=0: ObjectEntity type: TYPE(three) -!CHECK: omp_orig size=24 offset=24: ObjectEntity type: TYPE(three) -!CHECK: omp_out size=24 offset=48: ObjectEntity type: TYPE(three) -!CHECK: omp_priv size=24 offset=72: ObjectEntity type: TYPE(three) +!CHECK: omp_out size=24 offset=24: ObjectEntity type: TYPE(three) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=24 offset=0: ObjectEntity type: TYPE(three) +!CHECK: omp_priv size=24 offset=24: ObjectEntity type: TYPE(three) !$omp simd reduction(adder:res3) do i=1,n @@ -135,9 +138,10 @@ contains !CHECK: op.+: UserReductionDetails TYPE(two) !CHECK OtherConstruct scope !CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two) -!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two) -!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two) -!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two) +!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two) +!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two) !$omp simd reduction(+:res) @@ -163,14 +167,16 @@ contains !CHECK: op.+: UserReductionDetails TYPE(two) TYPE(three) !CHECK OtherConstruct scope !CHECK: omp_in size=8 offset=0: ObjectEntity type: TYPE(two) -!CHECK: omp_orig size=8 offset=8: ObjectEntity type: TYPE(two) -!CHECK: omp_out size=8 offset=16: ObjectEntity type: TYPE(two) -!CHECK: omp_priv size=8 offset=24: ObjectEntity type: TYPE(two) +!CHECK: omp_out size=8 offset=8: ObjectEntity type: TYPE(two) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=8 offset=0: ObjectEntity type: TYPE(two) +!CHECK: omp_priv size=8 offset=8: ObjectEntity type: TYPE(two) !CHECK: OtherConstruct scope !CHECK: omp_in size=24 offset=0: ObjectEntity type: TYPE(three) -!CHECK: omp_orig size=24 offset=24: ObjectEntity type: TYPE(three) -!CHECK: omp_out size=24 offset=48: ObjectEntity type: TYPE(three) -!CHECK: omp_priv size=24 offset=72: ObjectEntity type: TYPE(three) +!CHECK: omp_out size=24 offset=24: ObjectEntity type: TYPE(three) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=24 offset=0: ObjectEntity type: TYPE(three) +!CHECK: omp_priv size=24 offset=24: ObjectEntity type: TYPE(three) !$omp simd reduction(+:res3) do i=1,n @@ -183,6 +189,7 @@ contains enddo res%t2 = res2 res%t3 = res3 + funcBtwothree = res end function funcBtwothree !! This is checking a special case, where a reduction is declared inside a @@ -191,11 +198,12 @@ contains pure logical function reduction() !CHECK: reduction size=4 offset=0: ObjectEntity funcResult type: LOGICAL(4) !CHECK: rr: UserReductionDetails INTEGER(4) -!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes +!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes !CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4) +!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes +!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4) +!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4) !$omp declare reduction (rr : integer : omp_out = omp_out + omp_in) initializer (omp_priv = 0) reduction = .false. end function reduction diff --git a/flang/test/Semantics/OpenMP/declare-reduction-logical.f90 b/flang/test/Semantics/OpenMP/declare-reduction-logical.f90 index 7ab7cad..87fcecd 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-logical.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-logical.f90 @@ -18,9 +18,10 @@ contains !CHECK: op.AND: UserReductionDetails TYPE(logicalwrapper) !CHECK OtherConstruct scope !CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(logicalwrapper) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(logicalwrapper) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(logicalwrapper) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper) +!CHECK OtherConstruct scope +!CHECK: omp_orig size=4 offset=0: ObjectEntity type: TYPE(logicalwrapper) +!CHECK: omp_priv size=4 offset=4: ObjectEntity type: TYPE(logicalwrapper) !$omp simd reduction(.AND.:res) do i=1,n diff --git a/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90 b/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90 index 0882de8..763179c 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-modfile.f90 @@ -6,13 +6,13 @@ !type::t1 !integer(4)::val !endtype -!!$OMP DECLARE REDUCTION(*:t1:omp_out=omp_out*omp_in)INITIALIZER(omp_priv=& -!!$OMP&t1(1)) +!!$OMP DECLARE REDUCTION(*:t1: omp_out=omp_out*omp_in) INITIALIZER(omp_priv=t1(& +!!$OMP&1)) !!$OMP METADIRECTIVE OTHERWISE(DECLARE REDUCTION(+:INTEGER)) -!!$OMP DECLARE REDUCTION(.fluffy.:t1:omp_out=omp_out.fluffy.omp_in)INITIALI& -!!$OMP&ZER(omp_priv=t1(0)) -!!$OMP DECLARE REDUCTION(.mul.:t1:omp_out=omp_out.mul.omp_in)INITIALIZER(om& -!!$OMP&p_priv=t1(1)) +!!$OMP DECLARE REDUCTION(.fluffy.:t1: omp_out=omp_out.fluffy.omp_in) INITIALIZE& +!!$OMP&R(omp_priv=t1(0)) +!!$OMP DECLARE REDUCTION(.mul.:t1: omp_out=omp_out.mul.omp_in) INITIALIZER(omp_& +!!$OMP&priv=t1(1)) !interface operator(.mul.) !procedure::mul !end interface diff --git a/flang/test/Semantics/OpenMP/declare-reduction-operator.f90 b/flang/test/Semantics/OpenMP/declare-reduction-operator.f90 index dc12332..5fc4205 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-operator.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-operator.f90 @@ -11,11 +11,9 @@ module m1 !$omp declare reduction(.fluffy.:t1:omp_out=omp_out.fluffy.omp_in) !CHECK: op.fluffy., PUBLIC: UserReductionDetails TYPE(t1) !CHECK: t1, PUBLIC: DerivedType components: val -!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes +!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes !CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(t1) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(t1) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(t1) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(t1) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(t1) contains function my_mul(x, y) type (t1), intent (in) :: x, y diff --git a/flang/test/Semantics/OpenMP/declare-reduction-operators.f90 b/flang/test/Semantics/OpenMP/declare-reduction-operators.f90 index 84dbe1a..e0006bf 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-operators.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-operators.f90 @@ -64,9 +64,10 @@ program test_vector !CHECK: OtherConstruct scope: !CHECK: omp_in size=12 offset=0: ObjectEntity type: TYPE(vector) -!CHECK: omp_orig size=12 offset=12: ObjectEntity type: TYPE(vector) -!CHECK: omp_out size=12 offset=24: ObjectEntity type: TYPE(vector) -!CHECK: omp_priv size=12 offset=36: ObjectEntity type: TYPE(vector) +!CHECK: omp_out size=12 offset=12: ObjectEntity type: TYPE(vector) +!CHECK: OtherConstruct scope: +!CHECK: omp_orig size=12 offset=0: ObjectEntity type: TYPE(vector) +!CHECK: omp_priv size=12 offset=12: ObjectEntity type: TYPE(vector) v2 = Vector(0.0, 0.0, 0.0) v1 = Vector(1.0, 2.0, 3.0) diff --git a/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90 b/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90 index 9cd638d..115fe51 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction-renamedop.f90 @@ -33,11 +33,12 @@ program test_omp_reduction !$omp declare reduction (.modmul. : t1 : omp_out = omp_out .modmul. omp_in) initializer(omp_priv = t1(1.0)) !CHECK: op.modmul.: UserReductionDetails TYPE(t1) !CHECK: t1: Use from t1 in module1 -!CHECK: OtherConstruct scope: size=16 alignment=4 sourceRange=0 bytes +!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes !CHECK: omp_in size=4 offset=0: ObjectEntity type: TYPE(t1) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: TYPE(t1) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: TYPE(t1) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: TYPE(t1) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: TYPE(t1) +!CHECK: OtherConstruct scope: size=8 alignment=4 sourceRange=0 bytes +!CHECK: omp_orig size=4 offset=0: ObjectEntity type: TYPE(t1) +!CHECK: omp_priv size=4 offset=4: ObjectEntity type: TYPE(t1) result = t1(1.0) !$omp parallel do reduction(.modmul.:result) do i = 1, 10 diff --git a/flang/test/Semantics/OpenMP/declare-reduction.f90 b/flang/test/Semantics/OpenMP/declare-reduction.f90 index 1f39c57..c8dee5e 100644 --- a/flang/test/Semantics/OpenMP/declare-reduction.f90 +++ b/flang/test/Semantics/OpenMP/declare-reduction.f90 @@ -19,10 +19,12 @@ function func(x, n, init) !$omp declare reduction(red_add:integer(4):omp_out=omp_out+omp_in) initializer(initme(omp_priv,0)) !CHECK: red_add: UserReductionDetails !CHECK: Subprogram scope: initme +!CHECK: OtherConstruct scope: !CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4) +!CHECK: OtherConstruct scope: +!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4) +!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4) !$omp simd reduction(red_add:res) do i=1,n res=res+x(i) @@ -36,9 +38,11 @@ program main !$omp declare reduction (my_add_red : integer : omp_out = omp_out + omp_in) initializer (omp_priv=0) !CHECK: my_add_red: UserReductionDetails +!CHECK: OtherConstruct scope: !CHECK: omp_in size=4 offset=0: ObjectEntity type: INTEGER(4) -!CHECK: omp_orig size=4 offset=4: ObjectEntity type: INTEGER(4) -!CHECK: omp_out size=4 offset=8: ObjectEntity type: INTEGER(4) -!CHECK: omp_priv size=4 offset=12: ObjectEntity type: INTEGER(4) +!CHECK: omp_out size=4 offset=4: ObjectEntity type: INTEGER(4) +!CHECK: OtherConstruct scope: +!CHECK: omp_orig size=4 offset=0: ObjectEntity type: INTEGER(4) +!CHECK: omp_priv size=4 offset=4: ObjectEntity type: INTEGER(4) end program main |
