From a409a9bcd565bcce5c2bf64c75ebda615e54e80a Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Wed, 25 Mar 2026 16:56:33 -0500 Subject: [OpenMP] OpenMP 6.0 "taskgraph" support, frontend parts --- clang/include/clang/AST/OpenMPClause.h | 73 ++- clang/include/clang/AST/RecursiveASTVisitor.h | 6 + clang/include/clang/Sema/SemaOpenMP.h | 6 + clang/lib/AST/OpenMPClause.cpp | 19 +- clang/lib/AST/StmtProfile.cpp | 5 + clang/lib/CodeGen/CGOpenMPRuntime.cpp | 668 +++++++++++++++++-------- clang/lib/CodeGen/CGOpenMPRuntime.h | 17 +- clang/lib/CodeGen/CGStmtOpenMP.cpp | 63 ++- clang/lib/CodeGen/CodeGenFunction.h | 16 + clang/lib/Parse/ParseOpenMP.cpp | 15 +- clang/lib/Sema/SemaOpenMP.cpp | 28 ++ clang/lib/Sema/TreeTransform.h | 26 + clang/lib/Serialization/ASTReader.cpp | 11 + clang/lib/Serialization/ASTWriter.cpp | 5 + clang/tools/libclang/CIndex.cpp | 4 + llvm/include/llvm/Frontend/OpenMP/OMP.td | 1 + llvm/include/llvm/Frontend/OpenMP/OMPKinds.def | 12 +- 17 files changed, 734 insertions(+), 241 deletions(-) diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h index 27a737bd4363..0860aca97351 100644 --- a/clang/include/clang/AST/OpenMPClause.h +++ b/clang/include/clang/AST/OpenMPClause.h @@ -1939,6 +1939,75 @@ public: } }; +/// This represents a 'replayable' clause in the '#pragma omp target', +// '#pragma omp target enter data', '#pragma omp target exit data', +// '#pragma omp target update', '#pragma omp task', '#pragma omp taskloop' or +// '#pragma omp taskwait' directive. +/// +/// \code +/// #pragma omp task replayable(1) +/// \endcode +/// In this example directive '#pragma omp task' has the 'replayable' clause. +class OMPReplayableClause final : public OMPClause { +public: + friend class OMPClauseReader; + + /// Location of '('. + SourceLocation LParenLoc; + + /// Condition of the 'replayable' clause. + Stmt *Condition = nullptr; + + /// Set condition. + void setCondition(Expr *Cond) { Condition = Cond; } + + /// Build 'replayable' clause. + /// + /// \param Cond Condition of the clause. + /// \param StartLoc Starting location of the clause. + /// \param LParenLoc Location of '('. + /// \param EndLoc Ending location of the clause. + OMPReplayableClause(Expr *Cond, SourceLocation StartLoc, + SourceLocation LParenLoc, SourceLocation EndLoc) + : OMPClause(llvm::omp::OMPC_replayable, StartLoc, EndLoc), + LParenLoc(LParenLoc), Condition(Cond) {} + + /// Build an empty clause. + OMPReplayableClause() + : OMPClause(llvm::omp::OMPC_replayable, SourceLocation(), + SourceLocation()) {} + + /// Sets the location of '('. + void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } + + /// Returns the location of '('. + SourceLocation getLParenLoc() const { return LParenLoc; } + + /// Returns condition. + Expr *getCondition() const { return cast_or_null(Condition); } + + child_range children() { + if (Condition) + return child_range(&Condition, &Condition + 1); + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + if (Condition) + return const_child_range(&Condition, &Condition + 1); + return const_child_range(const_child_iterator(), const_child_iterator()); + } + + child_range used_children(); + const_child_range used_children() const { + return const_cast(this)->used_children(); + } + + static bool classof(const OMPClause *T) { + return T->getClauseKind() == llvm::omp::OMPC_replayable; + } +}; + /// This represents 'at' clause in the '#pragma omp error' directive /// /// \code @@ -8454,7 +8523,7 @@ class OMPGraphIdClause final void setCondition(Expr *Cond) { setStmt(Cond); } public: - /// Build 'grpah_id' clause with condition \a Cond. + /// Build 'graph_id' clause with condition \a Cond. /// /// \param Cond Condition of the clause. /// \param HelperCond Helper condition for the construct. @@ -8498,7 +8567,7 @@ class OMPGraphResetClause final void setCondition(Expr *Cond) { setStmt(Cond); } public: - /// Build 'grpah_id' clause with condition \a Cond. + /// Build 'graph_reset' clause with condition \a Cond. /// /// \param Cond Condition of the clause. /// \param HelperCond Helper condition for the construct. diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 32b928ca62fd..c327617c21b7 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -3634,6 +3634,12 @@ bool RecursiveASTVisitor::VisitOMPNowaitClause(OMPNowaitClause *C) { return true; } +template +bool RecursiveASTVisitor::VisitOMPReplayableClause(OMPReplayableClause *C) { + TRY_TO(TraverseStmt(C->getCondition())); + return true; +} + template bool RecursiveASTVisitor::VisitOMPUntiedClause(OMPUntiedClause *) { return true; diff --git a/clang/include/clang/Sema/SemaOpenMP.h b/clang/include/clang/Sema/SemaOpenMP.h index d88a85cc1b9f..6901740a03df 100644 --- a/clang/include/clang/Sema/SemaOpenMP.h +++ b/clang/include/clang/Sema/SemaOpenMP.h @@ -1161,6 +1161,12 @@ public: OMPClause *ActOnOpenMPSelfMapsClause(SourceLocation StartLoc, SourceLocation EndLoc); + /// Called on well-formed 'replayable' clause. + OMPClause *ActOnOpenMPReplayableClause(SourceLocation StartLoc, + SourceLocation EndLoc, + SourceLocation LParenLoc, + Expr *Condition); + /// Called on well-formed 'at' clause. OMPClause *ActOnOpenMPAtClause(OpenMPAtClauseKind Kind, SourceLocation KindLoc, diff --git a/clang/lib/AST/OpenMPClause.cpp b/clang/lib/AST/OpenMPClause.cpp index 3765b97447e6..a2a04f494fc3 100644 --- a/clang/lib/AST/OpenMPClause.cpp +++ b/clang/lib/AST/OpenMPClause.cpp @@ -326,6 +326,12 @@ OMPClause::child_range OMPNowaitClause::used_children() { return children(); } +OMPClause::child_range OMPReplayableClause::used_children() { + if (Condition) + return child_range(&Condition, &Condition + 1); + return children(); +} + OMPClause::child_range OMPGrainsizeClause::used_children() { if (Stmt **C = getAddrOfExprAsWritten(getPreInitStmt())) return child_range(C, C + 1); @@ -2176,6 +2182,15 @@ void OMPClausePrinter::VisitOMPNowaitClause(OMPNowaitClause *Node) { } } +void OMPClausePrinter::VisitOMPReplayableClause(OMPReplayableClause *Node) { + OS << "replayable"; + if (auto *Cond = Node->getCondition()) { + OS << "("; + Cond->printPretty(OS, nullptr, Policy, 0); + OS << ")"; + } +} + void OMPClausePrinter::VisitOMPUntiedClause(OMPUntiedClause *) { OS << "untied"; } @@ -2353,7 +2368,7 @@ void OMPClausePrinter::VisitOMPGrainsizeClause(OMPGrainsizeClause *Node) { } void OMPClausePrinter::VisitOMPGraphIdClause(OMPGraphIdClause *Node) { - OS << "graphId"; + OS << "graph_id"; if (Expr *E = Node->getCondition()) { OS << "("; E->printPretty(OS, nullptr, Policy, 0); @@ -2362,7 +2377,7 @@ void OMPClausePrinter::VisitOMPGraphIdClause(OMPGraphIdClause *Node) { } void OMPClausePrinter::VisitOMPGraphResetClause(OMPGraphResetClause *Node) { - OS << "graphReset"; + OS << "graph_reset"; if (Expr *E = Node->getCondition()) { OS << "("; E->printPretty(OS, nullptr, Policy, 0); diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index 0e43a48e40a9..11f8f96bfa16 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -600,6 +600,11 @@ void OMPClauseProfiler::VisitOMPNowaitClause(const OMPNowaitClause *C) { Profiler->VisitStmt(C->getCondition()); } +void OMPClauseProfiler::VisitOMPReplayableClause(const OMPReplayableClause *C) { + if (C->getCondition()) + Profiler->VisitStmt(C->getCondition()); +} + void OMPClauseProfiler::VisitOMPUntiedClause(const OMPUntiedClause *) {} void OMPClauseProfiler::VisitOMPMergeableClause(const OMPMergeableClause *) {} diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index e8a79df3dd5f..3bc90e40cbd1 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Frontend/OpenMP/OMP.h.inc" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalValue.h" @@ -2249,32 +2250,26 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF, if (!CGF.HaveInsertPoint()) return; - // Building kmp_taskgraph_flags_t flags for kmpc_taskgraph. C.f., kmp.h - enum { - NowaitFlag = 0x1, // Not used yet. - ReRecordFlag = 0x2, - }; - - unsigned Flags = 0; - - if (D.getSingleClause()) { - Flags |= NowaitFlag; + // The nogroup clause doesn't support an argument yet. FIXME. + const OMPNogroupClause *NoGroupClause = + D.getSingleClause(); + llvm::Value *NoGroup; + if (NoGroupClause) { + NoGroup = CGF.Builder.getInt32(1); + } else { + NoGroup = CGF.Builder.getInt32(0); } const OMPGraphResetClause *GraphResetClause = D.getSingleClause(); + llvm::Value *GraphReset; if (GraphResetClause) { const Expr *Cond = GraphResetClause->getCondition(); llvm::Value *CondVal = CGF.EvaluateExprAsBool(Cond); - if (CondVal) { - llvm::Value *CondBool = CGF.Builder.CreateICmpNE( - CondVal, llvm::ConstantInt::get(CondVal->getType(), 0)); - if (llvm::ConstantInt *CI = llvm::dyn_cast(CondBool)) { - if (CI->isOne()) { - Flags |= ReRecordFlag; - } - } - } + GraphReset = + CGF.Builder.CreateIntCast(CondVal, CGF.IntTy, /*isSigned=*/true); + } else { + GraphReset = CGF.Builder.getInt32(0); } llvm::Value *GraphId = CGF.Builder.getInt32(0); @@ -2282,7 +2277,8 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF, if (GraphIdClause) { const auto *E = GraphIdClause->getCondition(); auto *GraphIdVal = CGF.EmitScalarExpr(E); - GraphId = CGF.Builder.CreateIntCast(GraphIdVal, CGM.Int32Ty, true); + GraphId = + CGF.Builder.CreateIntCast(GraphIdVal, CGM.Int32Ty, /*isSigned=*/false); } CodeGenFunction OutlinedCGF(CGM, /*suppressNewContext=*/true); @@ -2290,6 +2286,7 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF, const auto *CS = cast(D.getAssociatedStmt()); auto BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::OMPWithinTaskgraphRAII WithinTaskgraph(CGF); CGF.EmitStmt(CS->getCapturedStmt()); }; @@ -2297,14 +2294,25 @@ void CGOpenMPRuntime::emitTaskgraphCall(CodeGenFunction &CGF, CGOpenMPTaskgraphRegionInfo TaskgraphRegion(*CS, BodyGen); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(OutlinedCGF, &TaskgraphRegion); + llvm::Function *FnT = OutlinedCGF.GenerateCapturedStmtFunction(*CS); - std::array Args{ + // Create an internal-linkage global variable to hold the taskgraph handle. + std::string GraphHandleName = getName({"omp", "taskgraph", "handle"}); + auto *GraphHandle = + new llvm::GlobalVariable(CGM.getModule(), CGM.VoidPtrTy, + /*IsConstant=*/false, + llvm::GlobalValue::InternalLinkage, + llvm::Constant::getNullValue(CGM.VoidPtrTy), + GraphHandleName); + + std::array Args{ emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), - CGF.Builder.getInt32(Flags), - CGF.Builder.getInt32(D.getBeginLoc().getHashValue()), + GraphHandle, GraphId, + GraphReset, + NoGroup, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(FnT, CGM.VoidPtrTy), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( CapStruct.getPointer(OutlinedCGF), CGM.VoidPtrTy)}; @@ -3874,7 +3882,9 @@ CGOpenMPRuntime::TaskResultTy CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, - Address Shareds, const OMPTaskDataTy &Data) { + Address Shareds, const OMPTaskDataTy &Data, + bool ForTaskgraph, + std::array &TaskAllocArgs) { ASTContext &C = CGM.getContext(); llvm::SmallVector Privates; // Aggregate privates and sort them by the alignment. @@ -4021,6 +4031,11 @@ CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( TaskEntry, KmpRoutineEntryPtrTy)}; llvm::Value *NewTask; + if (ForTaskgraph) { + TaskAllocArgs[0] = TaskFlags; + TaskAllocArgs[1] = KmpTaskTWithPrivatesTySize; + TaskAllocArgs[2] = SharedsSize; + } if (D.hasClausesOfKind()) { // Check if we have any device clause associated with the directive. const Expr *Device = nullptr; @@ -4774,118 +4789,183 @@ void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; - TaskResultTy Result = - emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); - llvm::Value *NewTask = Result.NewTask; - llvm::Function *TaskEntry = Result.TaskEntry; - llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; - LValue TDBase = Result.TDBase; - const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; - // Process list of dependences. - Address DependenciesArray = Address::invalid(); - llvm::Value *NumOfElements; - std::tie(NumOfElements, DependenciesArray) = - emitDependClause(CGF, Data.Dependences, Loc); - - // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() - // libcall. - // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, - // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, - // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence - // list is not empty - llvm::Value *ThreadID = getThreadID(CGF, Loc); - llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; - llvm::Value *DepTaskArgs[7]; - if (!Data.Dependences.empty()) { - DepTaskArgs[0] = UpLoc; - DepTaskArgs[1] = ThreadID; - DepTaskArgs[2] = NewTask; - DepTaskArgs[3] = NumOfElements; - DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); - DepTaskArgs[5] = CGF.Builder.getInt32(0); - DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - } - auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, - &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { - if (!Data.Tied) { - auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); - LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); - CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); + auto &&TaskgraphTaskCodeGen = + [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + std::array TGTaskArgs; + std::array TaskAllocArgs; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true, + TaskAllocArgs); + Address DependenciesArray = Address::invalid(); + llvm::Value *NumOfElements; + std::tie(NumOfElements, DependenciesArray) = + emitDependClause(CGF, Data.Dependences, Loc); + //llvm::dbgs() << "SharedsTy:\n"; + TGTaskArgs[0] = UpLoc; + TGTaskArgs[1] = ThreadId; + TGTaskArgs[2] = Result.NewTask; + //TGTaskArgs[2] = TaskgraphRegion->getTaskgraphValue(); + TGTaskArgs[3] = TaskAllocArgs[0]; // TaskFlags + TGTaskArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize + TGTaskArgs[5] = Shareds.emitRawPointer(CGF); + TGTaskArgs[6] = TaskAllocArgs[2]; // SharedsSize + if (auto RecType = dyn_cast(SharedsTy)) { + auto *RD = RecType->getAsRecordDecl(); + if (RD->fields().empty()) { + // FIXME: The condition might not be precisely correct here. + TGTaskArgs[6] = CGF.Builder.getSize(0); + } } - if (!Data.Dependences.empty()) { - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), - DepTaskArgs); + if (Data.Dependences.size() == 0) { + TGTaskArgs[7] = CGF.Builder.getInt32(0); + TGTaskArgs[8] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); } else { - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_omp_task), - TaskArgs); + TGTaskArgs[7] = NumOfElements; + TGTaskArgs[8] = DependenciesArray.emitRawPointer(CGF); } - // Check if parent region is untied and build return for untied task; - if (auto *Region = - dyn_cast_or_null(CGF.CapturedStmtInfo)) - Region->emitUntiedSwitch(CGF); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskgraph_task), + TGTaskArgs); }; - llvm::Value *DepWaitTaskArgs[7]; - if (!Data.Dependences.empty()) { - DepWaitTaskArgs[0] = UpLoc; - DepWaitTaskArgs[1] = ThreadID; - DepWaitTaskArgs[2] = NumOfElements; - DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); - DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); - DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - DepWaitTaskArgs[6] = - llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); - } - auto &M = CGM.getModule(); - auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, - TaskEntry, &Data, &DepWaitTaskArgs, - Loc](CodeGenFunction &CGF, PrePostActionTy &) { - CodeGenFunction::RunCleanupsScope LocalScope(CGF); - // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, - // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info - // is specified. - if (!Data.Dependences.empty()) - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_omp_taskwait_deps_51), - DepWaitTaskArgs); - // Call proxy_task_entry(gtid, new_task); - auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, - Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { - Action.Enter(CGF); - llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; - CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, - OutlinedFnArgs); + auto &&NonTaskgraphTaskCodeGen = + [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfCond, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + std::array DummyArray; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, false, DummyArray); + llvm::Value *NewTask = Result.NewTask; + llvm::Function *TaskEntry = Result.TaskEntry; + llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; + LValue TDBase = Result.TDBase; + const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; + // Process list of dependences. + Address DependenciesArray = Address::invalid(); + llvm::Value *NumOfElements; + std::tie(NumOfElements, DependenciesArray) = + emitDependClause(CGF, Data.Dependences, Loc); + + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() + // libcall. + // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, + // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, + // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence + // list is not empty + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; + llvm::Value *DepTaskArgs[7]; + if (!Data.Dependences.empty()) { + DepTaskArgs[0] = UpLoc; + DepTaskArgs[1] = ThreadID; + DepTaskArgs[2] = NewTask; + DepTaskArgs[3] = NumOfElements; + DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF); + DepTaskArgs[5] = CGF.Builder.getInt32(0); + DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + } + auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, + &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { + if (!Data.Tied) { + auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); + LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); + CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); + } + if (!Data.Dependences.empty()) { + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), + DepTaskArgs); + } else { + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_omp_task), + TaskArgs); + } + // Check if parent region is untied and build return for untied task; + if (auto *Region = + dyn_cast_or_null(CGF.CapturedStmtInfo)) + Region->emitUntiedSwitch(CGF); }; - // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t *new_task); - // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, - // kmp_task_t *new_task); - RegionCodeGenTy RCG(CodeGen); - CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_omp_task_begin_if0), - TaskArgs, - OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_omp_task_complete_if0), - TaskArgs); - RCG.setAction(Action); - RCG(CGF); + llvm::Value *DepWaitTaskArgs[7]; + if (!Data.Dependences.empty()) { + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); + } + auto &M = CGM.getModule(); + auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, + TaskEntry, &Data, &DepWaitTaskArgs, + Loc](CodeGenFunction &CGF, PrePostActionTy &) { + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, + // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info + // is specified. + if (!Data.Dependences.empty()) + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); + // Call proxy_task_entry(gtid, new_task); + auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, + Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { + Action.Enter(CGF); + llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; + CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, + OutlinedFnArgs); + }; + + // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t *new_task); + // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, + // kmp_task_t *new_task); + RegionCodeGenTy RCG(CodeGen); + CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_task_begin_if0), + TaskArgs, + OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_task_complete_if0), + TaskArgs); + RCG.setAction(Action); + RCG(CGF); + }; + + if (IfCond) { + emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); + } else { + RegionCodeGenTy ThenRCG(ThenCodeGen); + ThenRCG(CGF); + } }; - if (IfCond) { - emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); + if (CGF.getOMPWithinTaskgraph()) { + // Lexically within taskgraph, always replayable. + RegionCodeGenTy TaskgraphRCG(TaskgraphTaskCodeGen); + TaskgraphRCG(CGF); } else { - RegionCodeGenTy ThenRCG(ThenCodeGen); - ThenRCG(CGF); + if (ReplayableCond) { + // We have a replayable clause. Task is replayable if its argument is + // omitted or evaluates to TRUE. + emitIfClause(CGF, ReplayableCond, TaskgraphTaskCodeGen, + NonTaskgraphTaskCodeGen); + } else { + // Not taskgraph, not replayable. + RegionCodeGenTy NonTaskgraphRCG(NonTaskgraphTaskCodeGen); + NonTaskgraphRCG(CGF); + } } } @@ -4894,18 +4974,11 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; - TaskResultTy Result = - emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); - // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() - // libcall. - // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int - // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int - // sched, kmp_uint64 grainsize, void *task_dup); - llvm::Value *ThreadID = getThreadID(CGF, Loc); - llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *IfVal; if (IfCond) { IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, @@ -4914,68 +4987,173 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); } - LValue LBLVal = CGF.EmitLValueForField( - Result.TDBase, - *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); - const auto *LBVar = - cast(cast(D.getLowerBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), - /*IsInitializer=*/true); - LValue UBLVal = CGF.EmitLValueForField( - Result.TDBase, - *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); - const auto *UBVar = - cast(cast(D.getUpperBoundVariable())->getDecl()); - CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), - /*IsInitializer=*/true); - LValue StLVal = CGF.EmitLValueForField( - Result.TDBase, - *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); - const auto *StVar = - cast(cast(D.getStrideVariable())->getDecl()); - CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), - /*IsInitializer=*/true); - // Store reductions address. - LValue RedLVal = CGF.EmitLValueForField( - Result.TDBase, - *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); - if (Data.Reductions) { - CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; + + auto &&TaskgraphTaskloopCodeGen = + [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfVal, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + llvm::Value *ThreadId = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + std::array TGTaskLoopArgs; + std::array TaskAllocArgs; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, true, + TaskAllocArgs); + + // This is all copy/pasted from below. Refactor! + LValue LBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); + const auto *LBVar = + cast(cast(D.getLowerBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + /*IsInitializer=*/true); + LValue UBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); + const auto *UBVar = + cast(cast(D.getUpperBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + /*IsInitializer=*/true); + LValue StLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); + const auto *StVar = + cast(cast(D.getStrideVariable())->getDecl()); + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) { + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + } else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } + + TGTaskLoopArgs[0] = UpLoc; + TGTaskLoopArgs[1] = ThreadId; + TGTaskLoopArgs[2] = Result.NewTask; + TGTaskLoopArgs[3] = TaskAllocArgs[0]; // TaskFlags + TGTaskLoopArgs[4] = TaskAllocArgs[1]; // KmpTaskTWithPrivatesTySize + TGTaskLoopArgs[5] = Shareds.emitRawPointer(CGF); + TGTaskLoopArgs[6] = TaskAllocArgs[2]; // SharedsSize + TGTaskLoopArgs[7] = IfVal; + TGTaskLoopArgs[8] = LBLVal.getPointer(CGF); + TGTaskLoopArgs[9] = UBLVal.getPointer(CGF); + TGTaskLoopArgs[10] = CGF.EmitLoadOfScalar(StLVal, Loc); + TGTaskLoopArgs[11] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.Nogroup ? 1 : 0); + TGTaskLoopArgs[12] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.Schedule.getPointer() + ? Data.Schedule.getInt() ? NumTasks : Grainsize + : NoSchedule); + TGTaskLoopArgs[13] = Data.Schedule.getPointer() + ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, /*isSigned=*/false) + : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0); + TGTaskLoopArgs[14] = llvm::ConstantInt::getSigned(CGF.IntTy, Data.HasModifier ? 1 : 0); + TGTaskLoopArgs[15] = Result.TaskDupFn + ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskgraph_taskloop), + TGTaskLoopArgs); + }; + + auto &&NonTaskgraphTaskloopCodeGen = + [this, &Loc, &D, TaskFunction, &SharedsTy, &Shareds, IfVal, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + std::array DummyArray; + TaskResultTy Result = + emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data, false, DummyArray); + // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() + // libcall. + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + + LValue LBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); + const auto *LBVar = + cast(cast(D.getLowerBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + /*IsInitializer=*/true); + LValue UBLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); + const auto *UBVar = + cast(cast(D.getUpperBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + /*IsInitializer=*/true); + LValue StLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); + const auto *StVar = + cast(cast(D.getStrideVariable())->getDecl()); + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + /*IsInitializer=*/true); + // Store reductions address. + LValue RedLVal = CGF.EmitLValueForField( + Result.TDBase, + *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); + if (Data.Reductions) { + CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); + } else { + CGF.EmitNullInitialization(RedLVal.getAddress(), + CGF.getContext().VoidPtrTy); + } + llvm::SmallVector TaskArgs{ + UpLoc, + ThreadID, + Result.NewTask, + IfVal, + LBLVal.getPointer(CGF), + UBLVal.getPointer(CGF), + CGF.EmitLoadOfScalar(StLVal, Loc), + llvm::ConstantInt::getSigned( + CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler + llvm::ConstantInt::getSigned( + CGF.IntTy, Data.Schedule.getPointer() + ? Data.Schedule.getInt() ? NumTasks : Grainsize + : NoSchedule), + Data.Schedule.getPointer() + ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, + /*isSigned=*/false) + : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)}; + if (Data.HasModifier) + TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1)); + + TaskArgs.push_back(Result.TaskDupFn + ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + Result.TaskDupFn, CGF.VoidPtrTy) + : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), Data.HasModifier + ? OMPRTL___kmpc_taskloop_5 + : OMPRTL___kmpc_taskloop), + TaskArgs); + }; + + if (CGF.getOMPWithinTaskgraph()) { + // Lexically within taskgraph, always replayable. + RegionCodeGenTy TaskgraphRCG(TaskgraphTaskloopCodeGen); + TaskgraphRCG(CGF); } else { - CGF.EmitNullInitialization(RedLVal.getAddress(), - CGF.getContext().VoidPtrTy); + if (ReplayableCond) { + // We have a replayable clause. Taskloop is replayable if its argument + // is omitted or evaluates to TRUE. + emitIfClause(CGF, ReplayableCond, TaskgraphTaskloopCodeGen, + NonTaskgraphTaskloopCodeGen); + } else { + // Not taskgraph, not replayable. + RegionCodeGenTy NonTaskgraphRCG(NonTaskgraphTaskloopCodeGen); + NonTaskgraphRCG(CGF); + } } - enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; - llvm::SmallVector TaskArgs{ - UpLoc, - ThreadID, - Result.NewTask, - IfVal, - LBLVal.getPointer(CGF), - UBLVal.getPointer(CGF), - CGF.EmitLoadOfScalar(StLVal, Loc), - llvm::ConstantInt::getSigned( - CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler - llvm::ConstantInt::getSigned( - CGF.IntTy, Data.Schedule.getPointer() - ? Data.Schedule.getInt() ? NumTasks : Grainsize - : NoSchedule), - Data.Schedule.getPointer() - ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, - /*isSigned=*/false) - : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)}; - if (Data.HasModifier) - TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1)); - - TaskArgs.push_back(Result.TaskDupFn - ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - Result.TaskDupFn, CGF.VoidPtrTy) - : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)); - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), Data.HasModifier - ? OMPRTL___kmpc_taskloop_5 - : OMPRTL___kmpc_taskloop), - TaskArgs); } /// Emit reduction operation for each element of array (required for @@ -6105,9 +6283,15 @@ llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), CGM.VoidPtrTy)}; - return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_taskred_init), - Args); + if (CGF.getOMPWithinTaskgraph()) + return CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskgraph_taskred_init), + Args); + else + return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_taskred_init), + Args); } void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, @@ -6166,6 +6350,7 @@ Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, } void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) { if (!CGF.HaveInsertPoint()) return; @@ -6181,36 +6366,75 @@ void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *NumOfElements; std::tie(NumOfElements, DependenciesArray) = emitDependClause(CGF, Data.Dependences, Loc); - if (!Data.Dependences.empty()) { - llvm::Value *DepWaitTaskArgs[7]; - DepWaitTaskArgs[0] = UpLoc; - DepWaitTaskArgs[1] = ThreadID; - DepWaitTaskArgs[2] = NumOfElements; - DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); - DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); - DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - DepWaitTaskArgs[6] = - llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); - CodeGenFunction::RunCleanupsScope LocalScope(CGF); - - // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, - // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 - // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, - // kmp_int32 has_no_wait); if dependence info is specified. + auto &&TaskgraphTaskwaitCodeGen = + [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + llvm::Value *TGTaskWaitArgs[5]; + TGTaskWaitArgs[0] = UpLoc; + TGTaskWaitArgs[1] = ThreadID; + TGTaskWaitArgs[2] = NumOfElements; + if (Data.Dependences.empty()) { + // This should be a proper error + fprintf(stderr, "*** Taskwait inside taskgraph with no depend clause is not task-generating\n"); + exit(1); + } + TGTaskWaitArgs[3] = DependenciesArray.emitRawPointer(CGF); + TGTaskWaitArgs[4] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - M, OMPRTL___kmpc_omp_taskwait_deps_51), - DepWaitTaskArgs); + CGM.getModule(), OMPRTL___kmpc_taskgraph_taskwait), + TGTaskWaitArgs); + }; + auto &&NonTaskgraphTaskwaitCodeGen = + [this, UpLoc, ThreadID, NumOfElements, &DependenciesArray, &M, &Data] + (CodeGenFunction &CGF, PrePostActionTy &) { + if (!Data.Dependences.empty()) { + llvm::Value *DepWaitTaskArgs[7]; + DepWaitTaskArgs[0] = UpLoc; + DepWaitTaskArgs[1] = ThreadID; + DepWaitTaskArgs[2] = NumOfElements; + DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF); + DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); + DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); + DepWaitTaskArgs[6] = + llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); + + CodeGenFunction::RunCleanupsScope LocalScope(CGF); + + // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid, + // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 + // ndeps_noalias, kmp_depend_info_t *noalias_dep_list, + // kmp_int32 has_no_wait); if dependence info is specified. + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + M, OMPRTL___kmpc_omp_taskwait_deps_51), + DepWaitTaskArgs); + } else { + // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 + // global_tid); + llvm::Value *Args[] = {UpLoc, ThreadID}; + // Ignore return result until untied tasks are supported. + CGF.EmitRuntimeCall( + OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), + Args); + } + }; + if (CGF.getOMPWithinTaskgraph()) { + // Lexically within taskgraph, always replayable. + RegionCodeGenTy TaskgraphRCG(TaskgraphTaskwaitCodeGen); + TaskgraphRCG(CGF); } else { - - // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 - // global_tid); - llvm::Value *Args[] = {UpLoc, ThreadID}; - // Ignore return result until untied tasks are supported. - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait), - Args); + if (ReplayableCond) { + // We have a replayable clause. Taskwait is replayable if its argument + // is omitted or evaluates to TRUE. + emitIfClause(CGF, ReplayableCond, TaskgraphTaskwaitCodeGen, + NonTaskgraphTaskwaitCodeGen); + } else { + // Not taskgraph, not replayable. + RegionCodeGenTy NonTaskgraphRCG(NonTaskgraphTaskwaitCodeGen); + NonTaskgraphRCG(CGF); + } } } @@ -13362,6 +13586,7 @@ void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -13369,7 +13594,7 @@ void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, void CGOpenMPSIMDRuntime::emitTaskLoopCall( CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, const OMPTaskDataTy &Data) { + const Expr *IfCond, const Expr *ReplayableCond, const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } @@ -13410,6 +13635,7 @@ Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF, void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) { llvm_unreachable("Not supported in SIMD-only mode"); } diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index b74823dd6b7c..7ac06547a540 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -123,6 +123,7 @@ struct OMPTaskDataTy final { bool IsWorksharingReduction = false; bool HasNowaitClause = false; bool HasModifier = false; + const Expr *ReplayableCond = nullptr; }; /// Class intended to support codegen of all kind of the reduction clauses. @@ -582,7 +583,9 @@ protected: TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, - Address Shareds, const OMPTaskDataTy &Data); + Address Shareds, const OMPTaskDataTy &Data, + bool ForTaskgraph, + std::array &TaskAllocArgs); /// Emit update for lastprivate conditional data. void emitLastprivateConditionalUpdate(CodeGenFunction &CGF, LValue IVLVal, @@ -1175,6 +1178,7 @@ public: const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, + const Expr *ReplayableCond, const OMPTaskDataTy &Data); /// Emit task region for the taskloop directive. The taskloop region is @@ -1210,7 +1214,8 @@ public: const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, - const Expr *IfCond, const OMPTaskDataTy &Data); + const Expr *IfCond, const Expr *ReplayableCond, + const OMPTaskDataTy &Data); /// Emit code for the directive that does not require outlining. /// @@ -1378,6 +1383,7 @@ public: /// Emit code for 'taskwait' directive. virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, + const Expr *ReplayableCond, const OMPTaskDataTy &Data); /// Emit code for 'taskgraph' directive. @@ -2056,6 +2062,7 @@ public: const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, + const Expr *ReplayableCond, const OMPTaskDataTy &Data) override; /// Emit task region for the taskloop directive. The taskloop region is @@ -2090,7 +2097,8 @@ public: void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, - const OMPTaskDataTy &Data) override; + const Expr *ReplayableCond, const OMPTaskDataTy &Data) + override; /// Emit a code for reduction clause. Next code should be emitted for /// reduction: @@ -2210,7 +2218,8 @@ public: /// Emit code for 'taskwait' directive. void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, - const OMPTaskDataTy &Data) override; + const Expr *ReplayableCond, const OMPTaskDataTy &Data) + override; /// Emit code for 'taskgraph' directive. /// \param IfCond Expression evaluated in if clause associated with the target diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 724f093279ac..a88016edeb96 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -5483,8 +5483,12 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective( IntegerLiteral IfCond(getContext(), TrueOrFalse, getContext().getIntTypeForBitwidth(32, /*Signed=*/0), SourceLocation()); + IntegerLiteral ReplayableCond(getContext(), llvm::APInt(32, 1), + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, - SharedsTy, CapturedStruct, &IfCond, Data); + SharedsTy, CapturedStruct, &IfCond, + &ReplayableCond, Data); } void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, @@ -5593,15 +5597,27 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { OMPTaskDataTy Data; // Check if we should emit tied or untied task. Data.Tied = !S.getSingleClause(); + const Expr *ReplayableCond = nullptr; + if (auto *RC = S.getSingleClause()) { + ReplayableCond = RC->getCondition(); + if (!ReplayableCond) { + ReplayableCond = + IntegerLiteral::Create( + getContext(), llvm::APInt(32, 1), + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + } + } auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { CGF.EmitStmt(CS->getCapturedStmt()); }; auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, + IfCond, ReplayableCond](CodeGenFunction &CGF, + llvm::Function *OutlinedFn, const OMPTaskDataTy &Data) { CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, - Data); + ReplayableCond, Data); }; auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); @@ -5632,7 +5648,19 @@ void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { // Build list of dependences buildDependences(S, Data); Data.HasNowaitClause = S.hasClausesOfKind(); - CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), Data); + const Expr *ReplayableCond = nullptr; + if (auto *RC = S.getSingleClause()) { + ReplayableCond = RC->getCondition(); + if (!ReplayableCond) { + ReplayableCond = + IntegerLiteral::Create( + getContext(), llvm::APInt(32, 1), + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + } + } + CGM.getOpenMPRuntime().emitTaskwaitCall(*this, S.getBeginLoc(), + ReplayableCond, Data); } void CodeGenFunction::EmitOMPTaskgraphDirective( @@ -7987,6 +8015,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { } } + const Expr *ReplayableCond = nullptr; + if (auto *RC = S.getSingleClause()) { + ReplayableCond = RC->getCondition(); + if (!ReplayableCond) { + ReplayableCond = + IntegerLiteral::Create( + getContext(), llvm::APInt(32, 1), + getContext().getIntTypeForBitwidth(32, /*Signed=*/0), + SourceLocation()); + } + } + OMPTaskDataTy Data; // Check if taskloop must be emitted without taskgroup. Data.Nogroup = S.getSingleClause(); @@ -8106,15 +8146,18 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { (*LIP)->getType(), S.getBeginLoc())); }); }; - auto &&TaskGen = [&S, SharedsTy, CapturedStruct, - IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, - const OMPTaskDataTy &Data) { - auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, - &Data](CodeGenFunction &CGF, PrePostActionTy &) { + auto &&TaskGen = + [&S, SharedsTy, CapturedStruct, IfCond, ReplayableCond] + (CodeGenFunction &CGF, llvm::Function *OutlinedFn, + const OMPTaskDataTy &Data) { + auto &&CodeGen = + [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, ReplayableCond, + &Data](CodeGenFunction &CGF, PrePostActionTy &) { OMPLoopScope PreInitScope(CGF, S); CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, S.getBeginLoc(), S, OutlinedFn, SharedsTy, - CapturedStruct, IfCond, Data); + CapturedStruct, IfCond, + ReplayableCond, Data); }; CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, CodeGen); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 2b2d08570ee3..9ddada466dba 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -812,6 +812,22 @@ public: } }; + bool OMPWithinTaskgraph = false; + + bool getOMPWithinTaskgraph() { return OMPWithinTaskgraph; } + void setOMPWithinTaskgraph(bool In) { OMPWithinTaskgraph = In; } + + class OMPWithinTaskgraphRAII { + CodeGenFunction &CGF; + public: + OMPWithinTaskgraphRAII(CodeGenFunction &CGF_) : CGF(CGF_) { + CGF.setOMPWithinTaskgraph(true); + } + ~OMPWithinTaskgraphRAII() { + CGF.setOMPWithinTaskgraph(false); + } + }; + template typename DominatingValue::saved_type saveValueInCond(T value) { return DominatingValue::save(*this, value); diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 979d376d438f..da1555f02eb1 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -3328,6 +3328,7 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, case OMPC_reverse_offload: case OMPC_dynamic_allocators: case OMPC_full: + case OMPC_replayable: // OpenMP [2.7.1, Restrictions, p. 9] // Only one ordered clause can appear on a loop directive. // OpenMP [2.7.1, Restrictions, C/C++, p. 4] @@ -3341,7 +3342,8 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, ErrorFound = true; } - if (CKind == OMPC_nowait && PP.LookAhead(/*N=*/0).is(tok::l_paren) && + if ((CKind == OMPC_nowait || CKind == OMPC_replayable) && + PP.LookAhead(/*N=*/0).is(tok::l_paren) && getLangOpts().OpenMP >= 60) Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); else @@ -3362,6 +3364,17 @@ OMPClause *Parser::ParseOpenMPClause(OpenMPDirectiveKind DKind, } Clause = ParseOpenMPClause(CKind, WrongDirective); break; + if (getLangOpts().OpenMP < 60) { + // FIXME: This isn't an appropriate error message. + Diag(Tok, diag::err_omp_expected_clause) + << getOpenMPDirectiveName(OMPD_requires, OMPVersion); + ErrorFound = true; + } + if (PP.LookAhead(/*N=*/0).is(tok::l_paren)) + Clause = ParseOpenMPSingleExprClause(CKind, WrongDirective); + else + Clause = ParseOpenMPClause(CKind, WrongDirective); + break; case OMPC_update: if (!FirstClause) { Diag(Tok, diag::err_omp_more_one_clause) diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index 899fc19fbd4b..fff9445c168d 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -16623,6 +16623,9 @@ OMPClause *SemaOpenMP::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, case OMPC_graph_reset: Res = ActOnOpenMPGraphResetClause(Expr, StartLoc, LParenLoc, EndLoc); break; + case OMPC_replayable: + Res = ActOnOpenMPReplayableClause(StartLoc, EndLoc, LParenLoc, Expr); + break; case OMPC_novariants: Res = ActOnOpenMPNovariantsClause(Expr, StartLoc, LParenLoc, EndLoc); break; @@ -18326,6 +18329,11 @@ OMPClause *SemaOpenMP::ActOnOpenMPClause(OpenMPClauseKind Kind, case OMPC_self_maps: Res = ActOnOpenMPSelfMapsClause(StartLoc, EndLoc); break; + case OMPC_replayable: + Res = ActOnOpenMPReplayableClause(StartLoc, EndLoc, + /*LParenLoc=*/SourceLocation(), + /*Condition=*/nullptr); + break; case OMPC_destroy: Res = ActOnOpenMPDestroyClause(/*InteropVar=*/nullptr, StartLoc, /*LParenLoc=*/SourceLocation(), @@ -18560,6 +18568,26 @@ OMPClause *SemaOpenMP::ActOnOpenMPSelfMapsClause(SourceLocation StartLoc, return new (getASTContext()) OMPSelfMapsClause(StartLoc, EndLoc); } +OMPClause *SemaOpenMP::ActOnOpenMPReplayableClause(SourceLocation StartLoc, + SourceLocation EndLoc, + SourceLocation LParenLoc, + Expr *Condition) { + Expr *ValExpr = Condition; + if (Condition && LParenLoc.isValid()) { + if (!Condition->isValueDependent() && !Condition->isTypeDependent() && + !Condition->isInstantiationDependent() && + !Condition->containsUnexpandedParameterPack()) { + ExprResult Val = SemaRef.CheckBooleanCondition(StartLoc, Condition); + if (Val.isInvalid()) + return nullptr; + + ValExpr = Val.get(); + } + } + return new (getASTContext()) + OMPReplayableClause(ValExpr, StartLoc, LParenLoc, EndLoc); +} + StmtResult SemaOpenMP::ActOnOpenMPInteropDirective(ArrayRef Clauses, SourceLocation StartLoc, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 766b08929e7f..19dc278e14cc 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -1892,6 +1892,18 @@ public: LParenLoc, Condition); } + /// Build a new OpenMP 'replayable' clause. + /// + /// By default, performs semantic analysis to build the new OpenMP clause. + /// Subclasses may override this routine to provide different behavior. + OMPClause *RebuildOMPReplayableClause(Expr *Condition, + SourceLocation StartLoc, + SourceLocation LParenLoc, + SourceLocation EndLoc) { + return getSema().OpenMP().ActOnOpenMPReplayableClause(StartLoc, EndLoc, + LParenLoc, Condition); + } + /// Build a new OpenMP 'private' clause. /// /// By default, performs semantic analysis to build the new OpenMP clause. @@ -10822,6 +10834,20 @@ TreeTransform::TransformOMPNowaitClause(OMPNowaitClause *C) { C->getLParenLoc(), C->getEndLoc()); } +template +OMPClause * +TreeTransform::TransformOMPReplayableClause(OMPReplayableClause *C) { + ExprResult Cond; + if (auto *Condition = C->getCondition()) { + Cond = getDerived().TransformExpr(Condition); + if (Cond.isInvalid()) + return nullptr; + } + return getDerived().RebuildOMPReplayableClause(Cond.get(), C->getBeginLoc(), + C->getLParenLoc(), + C->getEndLoc()); +} + template OMPClause * TreeTransform::TransformOMPUntiedClause(OMPUntiedClause *C) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 79ab15a09cde..a0fcc2189bf4 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -11609,6 +11609,12 @@ OMPClause *OMPClauseReader::readClause() { case llvm::omp::OMPC_graph_id: C = new (Context) OMPGraphIdClause(); break; + case llvm::omp::OMPC_graph_reset: + C = new (Context) OMPGraphResetClause(); + break; + case llvm::omp::OMPC_replayable: + C = new (Context) OMPReplayableClause(); + break; case llvm::omp::OMPC_num_tasks: C = new (Context) OMPNumTasksClause(); break; @@ -11903,6 +11909,11 @@ void OMPClauseReader::VisitOMPNowaitClause(OMPNowaitClause *C) { C->setLParenLoc(Record.readSourceLocation()); } +void OMPClauseReader::VisitOMPReplayableClause(OMPReplayableClause *C) { + C->setCondition(Record.readSubExpr()); + C->setLParenLoc(Record.readSourceLocation()); +} + void OMPClauseReader::VisitOMPUntiedClause(OMPUntiedClause *) {} void OMPClauseReader::VisitOMPMergeableClause(OMPMergeableClause *) {} diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 9c2aa6632c12..d66add25f8b6 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -8093,6 +8093,11 @@ void OMPClauseWriter::VisitOMPNowaitClause(OMPNowaitClause *C) { Record.AddSourceLocation(C->getLParenLoc()); } +void OMPClauseWriter::VisitOMPReplayableClause(OMPReplayableClause *C) { + Record.AddStmt(C->getCondition()); + Record.AddSourceLocation(C->getLParenLoc()); +} + void OMPClauseWriter::VisitOMPUntiedClause(OMPUntiedClause *) {} void OMPClauseWriter::VisitOMPMergeableClause(OMPMergeableClause *) {} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 3af9d481f4b9..cee15395404d 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -2408,6 +2408,10 @@ void OMPClauseEnqueue::VisitOMPNowaitClause(const OMPNowaitClause *C) { Visitor->AddStmt(C->getCondition()); } +void OMPClauseEnqueue::VisitOMPReplayableClause(const OMPReplayableClause *C) { + Visitor->AddStmt(C->getCondition()); +} + void OMPClauseEnqueue::VisitOMPUntiedClause(const OMPUntiedClause *) {} void OMPClauseEnqueue::VisitOMPMergeableClause(const OMPMergeableClause *) {} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 09a899cbf256..00f5753159f6 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -499,6 +499,7 @@ def OMPC_Release : Clause<[Spelling<"release">]> { let clangClass = "OMPReleaseClause"; } def OMPC_Replayable : Clause<[Spelling<"replayable">]> { + let clangClass = "OMPReplayableClause"; let flangClass = "OmpReplayableClause"; let isValueOptional = true; } diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index 288585c8b42a..dfc00289a409 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -357,7 +357,17 @@ __OMP_RTL(__kmpc_omp_task, false, Int32, IdentPtr, Int32, /* kmp_task_t */ VoidPtr) __OMP_RTL(__kmpc_end_taskgroup, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_taskgroup, false, Void, IdentPtr, Int32) -__OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, Int32, Int32, Int32, VoidPtr, VoidPtr) +__OMP_RTL(__kmpc_taskgraph, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32, + Int32, Int32, VoidPtr, VoidPtr) +__OMP_RTL(__kmpc_taskgraph_task, false, Int32, IdentPtr, Int32, VoidPtr, Int32, + SizeTy, VoidPtr, SizeTy, Int32, VoidPtr) +__OMP_RTL(__kmpc_taskgraph_taskloop, false, Int32, IdentPtr, Int32, VoidPtr, + Int32, SizeTy, VoidPtr, SizeTy, Int32, Int64Ptr, Int64Ptr, Int64, + Int32, Int32, Int64, Int32, VoidPtr) +__OMP_RTL(__kmpc_taskgraph_taskwait, false, Void, IdentPtr, Int32, Int32, + VoidPtr, Int32) +__OMP_RTL(__kmpc_taskgraph_taskred_init, false, /* kmp_taskgroup */ VoidPtr, + Int32, Int32, VoidPtr) __OMP_RTL(__kmpc_omp_task_begin_if0, false, Void, IdentPtr, Int32, /* kmp_task_t */ VoidPtr) __OMP_RTL(__kmpc_omp_task_complete_if0, false, Void, IdentPtr, Int32, -- cgit v1.2.3