diff options
-rw-r--r-- | clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 | ||||
-rw-r--r-- | clang/lib/CodeGen/CGStmtOpenMP.cpp | 77 | ||||
-rw-r--r-- | llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h | 99 | ||||
-rw-r--r-- | llvm/include/llvm/Transforms/Utils/CodeExtractor.h | 24 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 298 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/HotColdSplitting.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/IROutliner.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Transforms/Utils/CodeExtractor.cpp | 37 | ||||
-rw-r--r-- | llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 328 | ||||
-rw-r--r-- | llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp | 2 | ||||
-rw-r--r-- | mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 233 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir | 18 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir | 4 | ||||
-rw-r--r-- | mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir | 2 |
15 files changed, 629 insertions, 514 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index f98339d..f0cb753 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10500,8 +10500,8 @@ void CGOpenMPRuntime::emitTargetDataCalls( llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP); llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPBuilder.createTargetData( - OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB, - CustomMapperCB, + OmpLoc, AllocaIP, CodeGenIP, /*DeallocIPs=*/{}, DeviceID, IfCondVal, + Info, GenMapInfoCB, CustomMapperCB, /*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc)); CGF.Builder.restoreIP(AfterIP); } diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index f6a0ca5..c9ac207 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -1835,10 +1835,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); - auto BodyGenCB = [&, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( - *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel"); + *this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel"); return llvm::Error::success(); }; @@ -1846,9 +1846,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail( - OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, - IfCond, NumThreads, ProcBind, S.hasCancel())); + llvm::OpenMPIRBuilder::InsertPointTy AfterIP = + cantFail(OMPBuilder.createParallel( + Builder, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, PrivCB, FiniCB, + IfCond, NumThreads, ProcBind, S.hasCancel())); Builder.restoreIP(AfterIP); return; } @@ -4361,21 +4362,23 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; if (CS) { for (const Stmt *SubStmt : CS->children()) { - auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SubStmt, AllocaIP, CodeGenIP, "section"); + auto SectionCB = [this, SubStmt](InsertPointTy AllocIP, + InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP, + CodeGenIP, "section"); return llvm::Error::success(); }; SectionCBVector.push_back(SectionCB); } } else { - auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { - OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CapturedStmt, AllocaIP, CodeGenIP, "section"); - return llvm::Error::success(); - }; + auto SectionCB = + [this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( + *this, CapturedStmt, AllocIP, CodeGenIP, "section"); + return llvm::Error::success(); + }; SectionCBVector.push_back(SectionCB); } @@ -4429,10 +4432,11 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [SectionRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section"); + *this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section"); return llvm::Error::success(); }; @@ -4514,10 +4518,11 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [MasterRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master"); + *this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master"); return llvm::Error::success(); }; @@ -4564,10 +4569,11 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [MaskedRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked"); + *this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked"); return llvm::Error::success(); }; @@ -4607,10 +4613,11 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [CriticalRegionBodyStmt, + this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical"); + *this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical"); return llvm::Error::success(); }; @@ -5577,8 +5584,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( InsertPointTy AllocaIP(AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); - auto BodyGenCB = [&, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); return llvm::Error::success(); @@ -5587,7 +5594,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective( if (!CapturedStmtInfo) CapturedStmtInfo = &CapStmtInfo; llvm::OpenMPIRBuilder::InsertPointTy AfterIP = - cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB)); + cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); return; } @@ -6167,8 +6175,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { return llvm::Error::success(); }; - auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) { + auto BodyGenCB = [&S, C, this](InsertPointTy AllocIP, + InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); const CapturedStmt *CS = S.getInnermostCapturedStmt(); @@ -6186,7 +6195,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { OutlinedFn, CapturedVars); } else { OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( - *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered"); + *this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered"); } return llvm::Error::success(); }; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 36be9bf..d033691 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -602,17 +602,19 @@ public: /// such InsertPoints need to be preserved, it can split the block itself /// before calling the callback. /// - /// AllocaIP and CodeGenIP must not point to the same position. - /// - /// \param AllocaIP is the insertion point at which new alloca instructions - /// should be placed. The BasicBlock it is pointing to must - /// not be split. - /// \param CodeGenIP is the insertion point at which the body code should be - /// placed. - /// + /// AllocIP and CodeGenIP must not point to the same position. + /// + /// \param AllocIP is the insertion point at which new allocations should + /// be placed. The BasicBlock it is pointing to must not be + /// split. + /// \param CodeGenIP is the insertion point at which the body code should be + /// placed. + /// \param DeallocIPs is the list of insertion points where explicit + /// deallocations, if needed, should be placed. /// \return an error, if any were triggered during execution. using BodyGenCallbackTy = - function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; + function_ref<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs)>; // This is created primarily for sections construct as llvm::function_ref // (BodyGenCallbackTy) is not storable (as described in the comments of @@ -621,7 +623,8 @@ public: /// /// \return an error, if any were triggered during execution. using StorableBodyGenCallbackTy = - std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; + std::function<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs)>; /// Callback type for loop body code generation. /// @@ -715,7 +718,9 @@ public: /// Generator for '#omp parallel' /// /// \param Loc The insert and source location description. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. /// \param PrivCB Callback to copy a given variable (think copy constructor). /// \param FiniCB Callback to finalize variable copies. @@ -726,10 +731,10 @@ public: /// /// \returns The insertion position *after* the parallel. LLVM_ABI InsertPointOrErrorTy createParallel( - const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable); + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable); /// Generator for the control flow structure of an OpenMP canonical loop. /// @@ -1347,7 +1352,9 @@ public: /// Generator for `#omp task` /// /// \param Loc The location where the task construct was encountered. - /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. /// \param Tied True if the task is tied, false if the task is untied. /// \param Final i1 value which is `true` if the task is final, `false` if the @@ -1363,21 +1370,23 @@ public: /// \param Mergeable If the given task is `mergeable` /// \param priority `priority-value' specifies the execution order of the /// tasks that is generated by the construct - LLVM_ABI InsertPointOrErrorTy - createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, bool Tied = true, - Value *Final = nullptr, Value *IfCondition = nullptr, - SmallVector<DependData> Dependencies = {}, bool Mergeable = false, - Value *EventHandle = nullptr, Value *Priority = nullptr); + LLVM_ABI InsertPointOrErrorTy createTask( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB, + bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr, + SmallVector<DependData> Dependencies = {}, bool Mergeable = false, + Value *EventHandle = nullptr, Value *Priority = nullptr); /// Generator for the taskgroup construct /// /// \param Loc The location where the taskgroup construct was encountered. - /// \param AllocaIP The insertion point to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion point to be used for explicit deallocation + /// instructions, if needed. /// \param BodyGenCB Callback that will generate the region code. - LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB); + LLVM_ABI InsertPointOrErrorTy createTaskgroup( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB); using FileIdentifierInfoCallbackTy = std::function<std::tuple<std::string, uint64_t>()>; @@ -2246,7 +2255,8 @@ public: struct OutlineInfo { using PostOutlineCBTy = std::function<void(Function &)>; PostOutlineCBTy PostOutlineCB; - BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; + BasicBlock *EntryBB, *ExitBB, *OuterAllocBB; + SmallVector<BasicBlock *> OuterDeallocBBs; SmallVector<Value *, 2> ExcludeArgsFromAggregate; LLVM_ABI virtual ~OutlineInfo() = default; @@ -2319,7 +2329,8 @@ public: /// \return an error, if any were triggered during execution. LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, - InsertPointTy AllocaIP = {}); + InsertPointTy AllocIP = {}, + ArrayRef<InsertPointTy> DeallocIPs = {}); /// Create the global variable holding the offload mappings information. LLVM_ABI GlobalVariable * @@ -2874,11 +2885,13 @@ public: /// Generator for `#omp distribute` /// /// \param Loc The location where the distribute construct was encountered. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion point to be used for allocations. + /// \param DeallocIPs The insertion points to be used for explicit + /// deallocations, if needed. /// \param BodyGenCB Callback that will generate the region code. - LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB); + LLVM_ABI InsertPointOrErrorTy createDistribute( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB); /// Generate conditional branch and relevant BasicBlocks through which private /// threads copy the 'copyin' variables from Master copy to threadprivate @@ -3206,9 +3219,11 @@ public: /// Generator for '#omp target data' /// /// \param Loc The location where the target data construct was encountered. - /// \param AllocaIP The insertion points to be used for alloca instructions. + /// \param AllocIP The insertion points to be used for allocations. /// \param CodeGenIP The insertion point at which the target directive code /// should be placed. + /// \param DeallocIPs The insertion points at which explicit deallocations + /// should be placed, if needed. /// \param IsBegin If true then emits begin mapper call otherwise emits /// end mapper call. /// \param DeviceID Stores the DeviceID from the device clause. @@ -3221,10 +3236,10 @@ public: /// \param DeviceAddrCB Optional callback to generate code related to /// use_device_ptr and use_device_addr. LLVM_ABI InsertPointOrErrorTy createTargetData( - const LocationDescription &Loc, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, - CustomMapperCallbackTy CustomMapperCB, + const LocationDescription &Loc, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs, + Value *DeviceID, Value *IfCond, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc = nullptr, function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> @@ -3233,7 +3248,8 @@ public: Value *SrcLocInfo = nullptr); using TargetBodyGenCallbackTy = function_ref<InsertPointOrErrorTy( - InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>; + InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs)>; using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, @@ -3245,6 +3261,8 @@ public: /// \param IsOffloadEntry whether it is an offload entry. /// \param CodeGenIP The insertion point where the call to the outlined /// function should be emitted. + /// \param DeallocIPs The insertion points at which explicit deallocations + /// should be placed, if needed. /// \param Info Stores all information realted to the Target directive. /// \param EntryInfo The entry information about the function. /// \param DefaultAttrs Structure containing the default attributes, including @@ -3265,8 +3283,9 @@ public: /// not. LLVM_ABI InsertPointOrErrorTy createTarget( const LocationDescription &Loc, bool IsOffloadEntry, - OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, + OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h index b3bea96..7b1e3a7 100644 --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -100,13 +100,13 @@ public: /// will be placed in the entry block of the function. BasicBlock *AllocationBlock; - /// A block outside of the extraction set where deallocations for - /// intermediate allocations can be placed inside. Not used for - /// automatically deallocated memory (e.g. `alloca`), which is the default. + /// A set of blocks outside of the extraction set where deallocations for + /// intermediate allocations should be placed. Not used for automatically + /// deallocated memory (e.g. `alloca`), which is the default. /// - /// If it is null and needed, the end of the replacement basic block will be - /// used to place deallocations. - BasicBlock *DeallocationBlock; + /// If it is empty and needed, the end of the replacement basic block will + /// be used to place deallocations. + SmallVector<BasicBlock *> DeallocationBlocks; /// If true, varargs functions can be extracted. bool AllowVarArgs; @@ -156,11 +156,11 @@ public: /// Any new allocations will be placed in the AllocationBlock, unless /// it is null, in which case it will be placed in the entry block of /// the function from which the code is being extracted. Explicit - /// deallocations for the aforementioned allocations will be placed in the - /// DeallocationBlock or the end of the replacement block, if needed. - /// If ArgsInZeroAddressSpace param is set to true, then the aggregate - /// param pointer of the outlined function is declared in zero address - /// space. + /// deallocations for the aforementioned allocations will be placed, if + /// needed, in all blocks in DeallocationBlocks or the end of the + /// replacement block. If ArgsInZeroAddressSpace param is set to true, then + /// the aggregate param pointer of the outlined function is declared in zero + /// address space. LLVM_ABI CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, @@ -168,7 +168,7 @@ public: AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, + ArrayRef<BasicBlock *> DeallocationBlocks = {}, std::string Suffix = "", bool ArgsInZeroAddressSpace = false); LLVM_ABI virtual ~CodeExtractor() = default; diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 8b22112..21364c2 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -476,10 +476,10 @@ public: AssumptionCache *AC = nullptr, bool AllowVarArgs = false, bool AllowAlloca = false, BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, + ArrayRef<BasicBlock *> DeallocationBlocks = {}, std::string Suffix = "", bool ArgsInZeroAddressSpace = false) : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs, - AllowAlloca, AllocationBlock, DeallocationBlock, Suffix, + AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix, ArgsInZeroAddressSpace), OMPBuilder(OMPBuilder) {} @@ -491,32 +491,16 @@ protected: class DeviceSharedMemCodeExtractor : public OMPCodeExtractor { public: - DeviceSharedMemCodeExtractor( - OpenMPIRBuilder &OMPBuilder, BasicBlock *AllocBlockOverride, - ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr, - bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, - BranchProbabilityInfo *BPI = nullptr, AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, - BasicBlock *AllocationBlock = nullptr, - BasicBlock *DeallocationBlock = nullptr, std::string Suffix = "", - bool ArgsInZeroAddressSpace = false) - : OMPCodeExtractor(OMPBuilder, BBs, DT, AggregateArgs, BFI, BPI, AC, - AllowVarArgs, AllowAlloca, AllocationBlock, - DeallocationBlock, Suffix, ArgsInZeroAddressSpace), - AllocBlockOverride(AllocBlockOverride) {} + using OMPCodeExtractor::OMPCodeExtractor; virtual ~DeviceSharedMemCodeExtractor() = default; protected: virtual Instruction * - allocateVar(BasicBlock *, BasicBlock::iterator, Type *VarType, + allocateVar(BasicBlock *BB, BasicBlock::iterator AllocIP, Type *VarType, const Twine &Name = Twine(""), AddrSpaceCastInst **CastedAlloc = nullptr) override { - // Ignore the CastedAlloc pointer, if requested, because shared memory - // should not be casted to address space 0 to be passed around. return OMPBuilder.createOMPAllocShared( - OpenMPIRBuilder::InsertPointTy( - AllocBlockOverride, AllocBlockOverride->getFirstInsertionPt()), - VarType, Name); + OpenMPIRBuilder::InsertPointTy(BB, AllocIP), VarType, Name); } virtual Instruction *deallocateVar(BasicBlock *BB, @@ -525,19 +509,12 @@ protected: return OMPBuilder.createOMPFreeShared( OpenMPIRBuilder::InsertPointTy(BB, DeallocIP), Var, VarType); } - -private: - // TODO: Remove the need for this override and instead get the CodeExtractor - // to provide a valid insert point for explicit deallocations by correctly - // populating its DeallocationBlock. - BasicBlock *AllocBlockOverride; }; /// Helper storing information about regions to outline using device shared /// memory for intermediate allocations. struct DeviceSharedMemOutlineInfo : public OpenMPIRBuilder::OutlineInfo { OpenMPIRBuilder &OMPBuilder; - BasicBlock *AllocBlockOverride = nullptr; DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder) : OMPBuilder(OMPBuilder) {} @@ -1667,11 +1644,11 @@ hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( - const LocationDescription &Loc, InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, - FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, - omp::ProcBindKind ProcBind, bool IsCancellable) { - assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous"); + const LocationDescription &Loc, InsertPointTy OuterAllocIP, + ArrayRef<InsertPointTy> OuterDeallocIPs, BodyGenCallbackTy BodyGenCB, + PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, + Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) { + assert(!isConflictIP(Loc.IP, OuterAllocIP) && "IPs must not be ambiguous"); if (!updateToLocation(Loc)) return Loc.IP; @@ -1711,7 +1688,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Save the outer alloca block because the insertion iterator may get // invalidated and we still need this later. - BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock(); + BasicBlock *OuterAllocaBlock = OuterAllocIP.getBlock(); // Vector to remember instructions we used only during the modeling but which // we want to delete at the end. @@ -1807,7 +1784,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Let the caller create the body. assert(BodyGenCB && "Expected body generation callback!"); InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin()); - if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(PRegExitBB, PRegExitBB->begin()); + if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, DeallocIP)) return Err; LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n"); @@ -1820,35 +1798,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // If OuterFn is not a Generic kernel, skip custom allocation. This causes // the CodeExtractor to follow its default behavior. Otherwise, we need to // use device shared memory to allocate argument structures. - if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC) { - auto Info = std::make_unique<DeviceSharedMemOutlineInfo>(*this); - - // Instead of using the insertion point provided by the CodeExtractor, - // here we need to use the block that eventually calls the outlined - // function for the `parallel` construct. - // - // The reason is that the explicit deallocation call will be inserted - // within the outlined function, whereas the alloca insertion point - // might actually be located somewhere else in the caller. This becomes - // a problem when e.g. `parallel` is inside of a `distribute` construct, - // because the deallocation would be executed multiple times and the - // allocation just once (outside of the loop). - // - // TODO: Ideally, we'd want to do the allocation and deallocation - // outside of the `parallel` outlined function, hence using here the - // insertion point provided by the CodeExtractor. We can't do this at - // the moment because there is currently no way of passing an eligible - // insertion point for the explicit deallocation to the CodeExtractor, - // as that block is created (at least when nested inside of - // `distribute`) sometime after createParallel() completed, so it can't - // be stored in the OutlineInfo structure here. - // - // The current approach results in an explicit allocation and - // deallocation pair for each `distribute` loop iteration in that case, - // which is suboptimal. - Info->AllocBlockOverride = EntryBB; - return Info; - } + if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC) + return std::make_unique<DeviceSharedMemOutlineInfo>(*this); } return std::make_unique<OutlineInfo>(); }(); @@ -1870,9 +1821,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( }; } - OI->OuterAllocaBB = OuterAllocaBlock; + OI->OuterAllocBB = OuterAllocaBlock; OI->EntryBB = PRegEntryBB; OI->ExitBB = PRegExitBB; + OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size()); + for (InsertPointTy DeallocIP : OuterDeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet; SmallVector<BasicBlock *, 32> Blocks; @@ -1887,7 +1841,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock */ OuterAllocaBlock, - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".omp_par", ArgsInZeroAddressSpace); // Find inputs to, outputs from the code region. @@ -1933,7 +1887,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( IRBuilder<>::InsertPointGuard Guard(Builder); LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n"); - Builder.restoreIP(OuterAllocaIP); + Builder.restoreIP(OuterAllocIP); Value *Ptr = Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded"); @@ -1985,7 +1939,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel( // Reset the outer alloca insertion point to the entry of the relevant block // in case it was invalidated. - OuterAllocaIP = IRBuilder<>::InsertPoint( + OuterAllocIP = IRBuilder<>::InsertPoint( OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt()); for (Value *Input : Inputs) { @@ -2151,10 +2105,10 @@ static Value *emitTaskDependencies( } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( - const LocationDescription &Loc, InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition, - SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle, - Value *Priority) { + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB, bool Tied, + Value *Final, Value *IfCondition, SmallVector<DependData> Dependencies, + bool Mergeable, Value *EventHandle, Value *Priority) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -2186,18 +2140,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( InsertPointTy TaskAllocaIP = InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin()); InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin()); - if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP)) + InsertPointTy TaskDeallocIP = InsertPointTy(TaskExitBB, TaskExitBB->begin()); + if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskDeallocIP)) return Err; auto OI = std::make_unique<OutlineInfo>(); OI->EntryBB = TaskAllocaBB; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocIP.getBlock(); OI->ExitBB = TaskExitBB; + OI->OuterDeallocBBs.reserve(DeallocIPs.size()); + for (InsertPointTy DeallocIP : DeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); // Add the thread ID argument. SmallVector<Instruction *, 4> ToBeDeleted; OI->ExcludeArgsFromAggregate.push_back(createFakeIntVal( - Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); + Builder, AllocIP, ToBeDeleted, TaskAllocaIP, "global.tid", false)); OI->PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies, Mergeable, Priority, EventHandle, TaskAllocaBB, @@ -2414,10 +2372,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask( return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, - InsertPointTy AllocaIP, - BodyGenCallbackTy BodyGenCB) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskgroup( + const LocationDescription &Loc, InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB) { if (!updateToLocation(Loc)) return InsertPointTy(); @@ -2432,7 +2389,7 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc, Builder.CreateCall(TaskgroupFn, {Ident, ThreadID}); BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit"); - if (Error Err = BodyGenCB(AllocaIP, Builder.saveIP())) + if (Error Err = BodyGenCB(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; Builder.SetInsertPoint(TaskgroupExitBB); @@ -2501,8 +2458,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections( SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB); Builder.SetInsertPoint(CaseBB); BranchInst *CaseEndBr = Builder.CreateBr(Continue); - if (Error Err = SectionCB(InsertPointTy(), {CaseEndBr->getParent(), - CaseEndBr->getIterator()})) + if (Error Err = + SectionCB(InsertPointTy(), + {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {})) return Err; CaseNumber++; } @@ -4355,8 +4313,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR( } // Allocate temporary buffer by master thread - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); Value *AllocSpan = Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1)); @@ -4395,8 +4353,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR( Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR( ArrayRef<ReductionInfo> ReductionInfos, ScanInfo *ScanRedInfo) { - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); for (ReductionInfo RedInfo : ReductionInfos) { Value *PrivateVar = RedInfo.PrivateVariable; @@ -4447,8 +4405,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction( if (!updateToLocation(Loc)) return Loc.IP; - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> Error { Builder.restoreIP(CodeGenIP); Function *CurFn = Builder.GetInsertBlock()->getParent(); // for (int k = 0; k <= ceil(log2(n)); ++k) @@ -5291,13 +5249,13 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize); auto OI = std::make_unique<OutlineInfo>(); - OI->OuterAllocaBB = CLI->getPreheader(); + OI->OuterAllocBB = CLI->getPreheader(); Function *OuterFn = CLI->getPreheader()->getParent(); // Instructions which need to be deleted at the end of code generation SmallVector<Instruction *, 4> ToBeDeleted; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocaIP.getBlock(); // Mark the body loop as region which needs to be extracted OI->EntryBB = CLI->getBody(); @@ -5334,7 +5292,7 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI, /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock */ CLI->getPreheader(), - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".omp_wsloop", /* AggrArgsIn0AddrSpace */ true); @@ -6691,8 +6649,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion( emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional); // generate body - if (Error Err = BodyGenCB(/* AllocaIP */ InsertPointTy(), - /* CodeGenIP */ Builder.saveIP())) + if (Error Err = + BodyGenCB(/* AllocIP */ InsertPointTy(), + /* CodeGenIP */ Builder.saveIP(), /* DeallocIPs */ {})) return Err; // emit exit call and do any needed finalization. @@ -7333,10 +7292,11 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction( } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( - const LocationDescription &Loc, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, - TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, - CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc, + const LocationDescription &Loc, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs, + Value *DeviceID, Value *IfCond, TargetDataInfo &Info, + GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, + omp::RuntimeFunction *MapperFunc, function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB, @@ -7362,11 +7322,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // Generate the code for the opening of the data environment. Capture all the // arguments of the runtime call by reference because they are used in the // closing of the region. - auto BeginThenGen = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BeginThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> Error { MapInfo = &GenMapInfoCB(Builder.saveIP()); if (Error Err = emitOffloadingArrays( - AllocaIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB, + AllocIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB, /*IsNonContiguous=*/true, DeviceAddrCB)) return Err; @@ -7420,7 +7380,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( cantFail(TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr, /*TargetTaskAllocaIP=*/{})); else - cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP, + cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocIP, /*Dependencies=*/{}, RTArgs, Info.HasNoWait)); } else { Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr( @@ -7451,8 +7411,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // If we need device pointer privatization, we need to emit the body of the // region with no privatization in the 'else' branch of the conditional. // Otherwise, we don't have to do anything. - auto BeginElseGen = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> Error { + auto BeginElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> Error { InsertPointOrErrorTy AfterIP = BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv); if (!AfterIP) @@ -7462,7 +7422,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( }; // Generate code for the closing of the data region. - auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto EndThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { TargetDataRTArgs RTArgs; Info.EmitDebug = !MapInfo->Names.empty(); emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true); @@ -7491,7 +7452,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( // We don't have to do anything to close the region if the if clause evaluates // to false. - auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto EndElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; @@ -7499,8 +7461,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( if (BodyGenCB) { Error Err = [&]() { if (IfCond) - return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP); - return BeginThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocIP); + return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs); }(); if (Err) @@ -7515,12 +7477,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData( restoreIPandDebugLoc(Builder, *AfterIP); if (IfCond) - return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP); - return EndThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocIP); + return EndThenGen(AllocIP, Builder.saveIP(), DeallocIPs); } if (IfCond) - return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP); - return BeginThenGen(AllocaIP, Builder.saveIP()); + return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocIP); + return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs); }(); if (Err) @@ -7727,15 +7689,18 @@ static Expected<Function *> createOutlinedFunction( if (OMPBuilder.Config.isTargetDevice()) OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func); - // Insert target deinit call in the device compilation pass. + BasicBlock *ExitBB = splitBB(Builder, /*CreateBranch=*/true, "target.exit"); BasicBlock *OutlinedBodyBB = splitBB(Builder, /*CreateBranch=*/true, "outlined.body"); llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc( Builder.saveIP(), - OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin())); + OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin()), + OpenMPIRBuilder::InsertPointTy(ExitBB, ExitBB->begin())); if (!AfterIP) return AfterIP.takeError(); - Builder.restoreIP(*AfterIP); + Builder.SetInsertPoint(ExitBB); + + // Insert target deinit call in the device compilation pass. if (OMPBuilder.Config.isTargetDevice()) OMPBuilder.createTargetDeinit(Builder); @@ -8183,7 +8148,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask( auto OI = std::make_unique<OutlineInfo>(); OI->EntryBB = TargetTaskAllocaBB; - OI->OuterAllocaBB = AllocaIP.getBlock(); + OI->OuterAllocBB = AllocaIP.getBlock(); // Add the thread ID argument. SmallVector<Instruction *, 4> ToBeDeleted; @@ -8445,7 +8410,8 @@ Error OpenMPIRBuilder::emitOffloadingArraysAndArgs( static void emitTargetCall( OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, - OpenMPIRBuilder::InsertPointTy AllocaIP, + OpenMPIRBuilder::InsertPointTy AllocIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, @@ -8502,8 +8468,9 @@ static void emitTargetCall( }; auto &&EmitTargetCallElse = - [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error { + [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> Error { // Assume no error was returned because EmitTargetCallFallbackCB doesn't // produce any. OpenMPIRBuilder::InsertPointTy AfterIP = cantFail([&]() { @@ -8513,7 +8480,7 @@ static void emitTargetCall( // OutlinedFnID=nullptr results in that call not being done. OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs; return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr, - /*RTLoc=*/nullptr, AllocaIP, + /*RTLoc=*/nullptr, AllocIP, Dependencies, EmptyRTArgs, HasNoWait); } return EmitTargetCallFallbackCB(Builder.saveIP()); @@ -8524,13 +8491,14 @@ static void emitTargetCall( }; auto &&EmitTargetCallThen = - [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error { + [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> Error { Info.HasNoWait = HasNoWait; OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP()); OpenMPIRBuilder::TargetDataRTArgs RTArgs; if (Error Err = OMPBuilder.emitOffloadingArraysAndArgs( - AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB, + AllocIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB, /*IsNonContiguous=*/true, /*ForEndCall=*/false)) return Err; @@ -8603,13 +8571,13 @@ static void emitTargetCall( // The presence of certain clauses on the target directive require the // explicit generation of the target task. if (RequiresOuterTargetTask) - return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP, + return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocIP, Dependencies, KArgs.RTArgs, Info.HasNoWait); return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, - DeviceID, RTLoc, AllocaIP); + DeviceID, RTLoc, AllocIP); }()); Builder.restoreIP(AfterIP); @@ -8620,24 +8588,24 @@ static void emitTargetCall( // wasn't created. In this case we just run the host fallback directly and // ignore any potential 'if' clauses. if (!OutlinedFnID) { - cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP())); + cantFail(EmitTargetCallElse(AllocIP, Builder.saveIP(), DeallocIPs)); return; } // If there's no 'if' clause, only generate the kernel launch code path. if (!IfCond) { - cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP())); + cantFail(EmitTargetCallThen(AllocIP, Builder.saveIP(), DeallocIPs)); return; } cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen, - EmitTargetCallElse, AllocaIP)); + EmitTargetCallElse, AllocIP)); } OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( - const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP, - InsertPointTy CodeGenIP, TargetDataInfo &Info, - TargetRegionEntryInfo &EntryInfo, + const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocIP, + InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs, + TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB, @@ -8665,9 +8633,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget( // to make a remote call (offload) to the previously outlined function // that represents the target region. Do that now. if (!Config.isTargetDevice()) - emitTargetCall(*this, Builder, AllocaIP, Info, DefaultAttrs, RuntimeAttrs, - IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB, - CustomMapperCB, Dependencies, HasNowait); + emitTargetCall(*this, Builder, AllocIP, DeallocIPs, Info, DefaultAttrs, + RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs, + GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait); return Builder.saveIP(); } @@ -9446,15 +9414,16 @@ void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn, Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, - InsertPointTy AllocaIP) { + InsertPointTy AllocIP, + ArrayRef<InsertPointTy> DeallocIPs) { // If the condition constant folds and can be elided, try to avoid emitting // the condition and the dead arm of the if/else. if (auto *CI = dyn_cast<ConstantInt>(Cond)) { auto CondConstant = CI->getSExtValue(); if (CondConstant) - return ThenGen(AllocaIP, Builder.saveIP()); + return ThenGen(AllocIP, Builder.saveIP(), DeallocIPs); - return ElseGen(AllocaIP, Builder.saveIP()); + return ElseGen(AllocIP, Builder.saveIP(), DeallocIPs); } Function *CurFn = Builder.GetInsertBlock()->getParent(); @@ -9467,13 +9436,13 @@ Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, Builder.CreateCondBr(Cond, ThenBlock, ElseBlock); // Emit the 'then' code. emitBlock(ThenBlock, CurFn); - if (Error Err = ThenGen(AllocaIP, Builder.saveIP())) + if (Error Err = ThenGen(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; emitBranch(ContBlock); // Emit the 'else' code if present. // There is no need to emit line number for unconditional branch. emitBlock(ElseBlock, CurFn); - if (Error Err = ElseGen(AllocaIP, Builder.saveIP())) + if (Error Err = ElseGen(AllocIP, Builder.saveIP(), DeallocIPs)) return Err; // There is no need to emit line number for unconditional branch. emitBranch(ContBlock); @@ -10181,13 +10150,14 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, // Generate the body of teams. InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(ExitBB, ExitBB->begin()); + if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP)) return Err; auto OI = std::make_unique<OutlineInfo>(); OI->EntryBB = AllocaBB; OI->ExitBB = ExitBB; - OI->OuterAllocaBB = &OuterAllocaBB; + OI->OuterAllocBB = &OuterAllocaBB; // Insert fake values for global tid and bound tid. SmallVector<Instruction *, 8> ToBeDeleted; @@ -10243,14 +10213,13 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc, return Builder.saveIP(); } -OpenMPIRBuilder::InsertPointOrErrorTy -OpenMPIRBuilder::createDistribute(const LocationDescription &Loc, - InsertPointTy OuterAllocaIP, - BodyGenCallbackTy BodyGenCB) { +OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createDistribute( + const LocationDescription &Loc, InsertPointTy OuterAllocIP, + ArrayRef<InsertPointTy> OuterDeallocIPs, BodyGenCallbackTy BodyGenCB) { if (!updateToLocation(Loc)) return InsertPointTy(); - BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock(); + BasicBlock *OuterAllocaBB = OuterAllocIP.getBlock(); if (OuterAllocaBB == Builder.GetInsertBlock()) { BasicBlock *BodyBB = @@ -10267,13 +10236,17 @@ OpenMPIRBuilder::createDistribute(const LocationDescription &Loc, // Generate the body of distribute clause InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin()); InsertPointTy CodeGenIP(BodyBB, BodyBB->begin()); - if (Error Err = BodyGenCB(AllocaIP, CodeGenIP)) + InsertPointTy DeallocIP(ExitBB, ExitBB->begin()); + if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP)) return Err; auto OI = std::make_unique<OutlineInfo>(); - OI->OuterAllocaBB = OuterAllocaIP.getBlock(); + OI->OuterAllocBB = OuterAllocIP.getBlock(); OI->EntryBB = AllocaBB; OI->ExitBB = ExitBB; + OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size()); + for (InsertPointTy DeallocIP : OuterDeallocIPs) + OI->OuterDeallocBBs.push_back(DeallocIP.getBlock()); addOutlineInfo(std::move(OI)); Builder.SetInsertPoint(ExitBB, ExitBB->begin()); @@ -10337,32 +10310,33 @@ std::unique_ptr<CodeExtractor> OpenMPIRBuilder::OutlineInfo::createCodeExtractor(ArrayRef<BasicBlock *> Blocks, bool ArgsInZeroAddressSpace, Twine Suffix) { - return std::make_unique<CodeExtractor>(Blocks, /* DominatorTree */ nullptr, - /* AggregateArgs */ true, - /* BlockFrequencyInfo */ nullptr, - /* BranchProbabilityInfo */ nullptr, - /* AssumptionCache */ nullptr, - /* AllowVarArgs */ true, - /* AllowAlloca */ true, - /* AllocationBlock*/ OuterAllocaBB, - /* DeallocationBlock */ nullptr, - /* Suffix */ Suffix.str(), - ArgsInZeroAddressSpace); + return std::make_unique<CodeExtractor>( + Blocks, /* DominatorTree */ nullptr, + /* AggregateArgs */ true, + /* BlockFrequencyInfo */ nullptr, + /* BranchProbabilityInfo */ nullptr, + /* AssumptionCache */ nullptr, + /* AllowVarArgs */ true, + /* AllowAlloca */ true, + /* AllocationBlock*/ OuterAllocBB, + /* DeallocationBlocks */ ArrayRef<BasicBlock *>(), + /* Suffix */ Suffix.str(), ArgsInZeroAddressSpace); } std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor( ArrayRef<BasicBlock *> Blocks, bool ArgsInZeroAddressSpace, Twine Suffix) { - // TODO: Initialize the DeallocationBlock with a proper pair to OuterAllocaBB. return std::make_unique<DeviceSharedMemCodeExtractor>( - OMPBuilder, AllocBlockOverride, Blocks, /* DominatorTree */ nullptr, + OMPBuilder, Blocks, /* DominatorTree */ nullptr, /* AggregateArgs */ true, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, - /* AllocationBlock*/ OuterAllocaBB, - /* DeallocationBlock */ ExitBB, + /* AllocationBlock*/ OuterAllocBB, + /* DeallocationBlocks */ OuterDeallocBBs.empty() + ? SmallVector<BasicBlock *>{ExitBB} + : OuterDeallocBBs, /* Suffix */ Suffix.str(), ArgsInZeroAddressSpace); } diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp index 5780901..e8f3c68 100644 --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -721,7 +721,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) { SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr, /* BPI */ nullptr, AC, /* AllowVarArgs */ false, /* AllowAlloca */ false, /* AllocaBlock */ nullptr, - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ "cold." + std::to_string(OutlinedFunctionID)); if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) && diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp index 77c1d98..8cdaca4 100644 --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -2829,7 +2829,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, nullptr, nullptr, "outlined"); + false, nullptr, {}, "outlined"); findAddInputsOutputs(M, *OS, NotSame); if (!OS->IgnoreRegion) OutlinedRegions.push_back(OS); @@ -2940,7 +2940,7 @@ unsigned IROutliner::doOutline(Module &M) { OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, nullptr, nullptr, "outlined"); + false, nullptr, {}, "outlined"); bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { unsigned StartIdx = OS->Candidate->getStartIdx(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index d9c8412..20fcb73 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1086,7 +1086,8 @@ private: SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; BasicBlock *StartBB = nullptr, *EndBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1126,7 +1127,8 @@ private: const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); ParentBB->getTerminator()->eraseFromParent(); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { BasicBlock *CGStartBB = CodeGenIP.getBlock(); BasicBlock *CGEndBB = SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); @@ -1256,8 +1258,9 @@ private: // avoid overriding binding settings, and without explicit cancellation. OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPInfoCache.OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, - OMP_PROC_BIND_default, /* IsCancellable */ false)); + Loc, AllocaIP, /* DeallocIPs */ {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); BranchInst::Create(AfterBB, AfterIP.getBlock()); // Perform the actual outlining. diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 3339f5e..c484968 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -264,11 +264,11 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, BasicBlock *AllocationBlock, - BasicBlock *DeallocationBlock, std::string Suffix, - bool ArgsInZeroAddressSpace) + ArrayRef<BasicBlock *> DeallocationBlocks, + std::string Suffix, bool ArgsInZeroAddressSpace) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), - DeallocationBlock(DeallocationBlock), AllowVarArgs(AllowVarArgs), + DeallocationBlocks(DeallocationBlocks), AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {} @@ -2029,22 +2029,25 @@ CallInst *CodeExtractor::emitReplacerCall( {}, call); // Deallocate intermediate variables if they need explicit deallocation. - BasicBlock *DeallocBlock = codeReplacer; - BasicBlock::iterator DeallocIP = codeReplacer->end(); - if (DeallocationBlock) { - DeallocBlock = DeallocationBlock; - DeallocIP = DeallocationBlock->getFirstInsertionPt(); - } + auto deallocVars = [&](BasicBlock *DeallocBlock, + BasicBlock::iterator DeallocIP) { + int Index = 0; + for (Value *Output : outputs) { + if (!StructValues.contains(Output)) + deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++], + Output->getType()); + } - int Index = 0; - for (Value *Output : outputs) { - if (!StructValues.contains(Output)) - deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++], - Output->getType()); - } + if (Struct) + deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy); + }; - if (Struct) - deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy); + if (DeallocationBlocks.empty()) { + deallocVars(codeReplacer, codeReplacer->end()); + } else { + for (BasicBlock *DeallocationBlock : DeallocationBlocks) + deallocVars(DeallocationBlock, DeallocationBlock->getFirstInsertionPt()); + } return call; } diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index b7a060b..9e9f943 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -55,8 +55,9 @@ using namespace omp; } #define BODYGENCB_WRAPPER(cb) \ - [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \ - cb(AllocaIP, CodeGenIP); \ + [&cb](InsertPointTy AllocIP, InsertPointTy CodeGenIP, \ + ArrayRef<InsertPointTy> DeallocIPs) -> Error { \ + cb(AllocIP, CodeGenIP, DeallocIPs); \ return Error::success(); \ } @@ -664,10 +665,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -715,8 +717,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); @@ -777,10 +779,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -828,8 +831,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 1U); EXPECT_EQ(NumFinalizationPoints, 1U); @@ -885,7 +888,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumInnerBodiesGenerated; return Error::success(); }; @@ -908,7 +912,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { return Error::success(); }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -917,7 +922,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, + OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -929,7 +934,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), + Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB), PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -986,7 +991,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { unsigned NumOuterBodiesGenerated = 0; unsigned NumFinalizationPoints = 0; - auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumInnerBodiesGenerated; return Error::success(); }; @@ -1009,7 +1015,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { return Error::success(); }; - auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumOuterBodiesGenerated; Builder.restoreIP(CodeGenIP); BasicBlock *CGBB = CodeGenIP.getBlock(); @@ -1022,18 +1029,18 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP1, - OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP, + OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP1); Builder.CreateBr(NewBB1); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2, - OMPBuilder.createParallel( - InsertPointTy(NewBB1, NewBB1->end()), AllocaIP, - InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr, - OMP_PROC_BIND_default, false)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP2, + OMPBuilder.createParallel(InsertPointTy(NewBB1, NewBB1->end()), AllocIP, + {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr, + nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP2); Builder.CreateBr(NewBB2); @@ -1043,7 +1050,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB), + Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB), PrivCB, FiniCB, nullptr, nullptr, OMP_PROC_BIND_default, false)); @@ -1107,10 +1114,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { unsigned NumPrivatizedVars = 0; unsigned NumFinalizationPoints = 0; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumBodiesGenerated; - Builder.restoreIP(AllocaIP); + Builder.restoreIP(AllocIP); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); Builder.CreateStore(F->arg_begin(), PrivAI); @@ -1159,7 +1167,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, + OMPBuilder.createParallel(Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, Builder.CreateIsNotNull(F->arg_begin()), nullptr, OMP_PROC_BIND_default, false)); @@ -1214,7 +1222,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { unsigned NumFinalizationPoints = 0; CallInst *CheckedBarrier = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumBodiesGenerated; Builder.restoreIP(CodeGenIP); @@ -1282,11 +1291,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) { IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), F->getEntryBlock().getFirstInsertionPt()); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel( - Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB, - FiniCB, Builder.CreateIsNotNull(F->arg_begin()), - nullptr, OMP_PROC_BIND_default, true)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createParallel(Loc, AllocaIP, {}, BODYGENCB_WRAPPER(BodyGenCB), + PrivCB, FiniCB, + Builder.CreateIsNotNull(F->arg_begin()), + nullptr, OMP_PROC_BIND_default, true)); EXPECT_EQ(NumBodiesGenerated, 1U); EXPECT_EQ(NumPrivatizedVars, 0U); @@ -1351,7 +1361,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc); Instruction *Internal; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { IRBuilder<>::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); Internal = Builder.CreateCall(TakeI32Func, I32Val); @@ -1371,8 +1382,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) { F->getEntryBlock().getFirstInsertionPt()); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createParallel( - Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, - nullptr, OMP_PROC_BIND_default, false)); + Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + nullptr, nullptr, OMP_PROC_BIND_default, false)); Builder.restoreIP(AfterIP); Builder.CreateRetVoid(); @@ -2875,9 +2886,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -2956,9 +2968,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3035,7 +3048,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { // actual start for bodyCB llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); @@ -3286,7 +3300,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); @@ -3360,7 +3375,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) { AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock(); llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint(); EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst); @@ -3467,9 +3483,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3560,9 +3577,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) { BasicBlock *EntryBB = nullptr; BasicBlock *ThenBB = nullptr; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -3681,9 +3699,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) { Function *CopyFunc = Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - if (AllocaIP.isSet()) - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + if (AllocIP.isSet()) + Builder.restoreIP(AllocIP); else Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt())); PrivAI = Builder.CreateAlloca(F->arg_begin()->getType()); @@ -4545,8 +4564,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) { AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty()); Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -4626,7 +4646,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4682,7 +4703,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4744,7 +4766,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) { Value *NumTeamsUpper = Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4811,7 +4834,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4868,7 +4892,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -4935,7 +4960,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) { Function::Create(FunctionType::get(Builder.getVoidTy(), false), GlobalValue::ExternalLinkage, "fakeFunction", M.get()); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); Builder.CreateCall(FakeFunction, {}); return Error::success(); @@ -5153,7 +5179,8 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { // xor of thread-id; // and store the result in global variables. InsertPointTy BodyIP, BodyAllocaIP; - auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5171,7 +5198,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { Builder.CreateStore(Xor, XorReduced); BodyIP = Builder.saveIP(); - BodyAllocaIP = InnerAllocaIP; + BodyAllocaIP = InnerAllocIP; return Error::success(); }; @@ -5207,12 +5234,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) { // Do nothing in finalization. auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); }; - ASSERT_EXPECTED_INIT( - OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB, - /* IfCondition */ nullptr, - /* NumThreads */ nullptr, OMP_PROC_BIND_default, - /* IsCancellable */ false)); + ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createParallel( + Loc, OuterAllocaIP, {}, BodyGenCB, PrivCB, FiniCB, + /* IfCondition */ nullptr, + /* NumThreads */ nullptr, OMP_PROC_BIND_default, + /* IsCancellable */ false)); Builder.restoreIP(AfterIP); OpenMPIRBuilder::ReductionInfo ReductionInfos[] = { @@ -5531,8 +5558,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Builder.getInt32(1), XorReduced); InsertPointTy FirstBodyIP, FirstBodyAllocaIP; - auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP) { + auto FirstBodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5547,13 +5574,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Sum, SumReduced); FirstBodyIP = Builder.saveIP(); - FirstBodyAllocaIP = InnerAllocaIP; + FirstBodyAllocaIP = InnerAllocIP; return Error::success(); }; InsertPointTy SecondBodyIP, SecondBodyAllocaIP; - auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP, - InsertPointTy CodeGenIP) { + auto SecondBodyGenCB = [&](InsertPointTy InnerAllocIP, + InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { IRBuilderBase::InsertPointGuard Guard(Builder); Builder.restoreIP(CodeGenIP); @@ -5566,7 +5594,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { Builder.CreateStore(Xor, XorReduced); SecondBodyIP = Builder.saveIP(); - SecondBodyAllocaIP = InnerAllocaIP; + SecondBodyAllocaIP = InnerAllocIP; return Error::success(); }; @@ -5606,14 +5634,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP1, - OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB, + OMPBuilder.createParallel(Loc, OuterAllocaIP, {}, FirstBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, /* IsCancellable */ false)); Builder.restoreIP(AfterIP1); ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP2, - OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, + OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, {}, SecondBodyGenCB, PrivCB, FiniCB, /* IfCondition */ nullptr, /* NumThreads */ nullptr, OMP_PROC_BIND_default, @@ -5707,7 +5735,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) { llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; auto FiniCB = [&](InsertPointTy IP) { return Error::success(); }; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; SectionCBVector.push_back(SectionCB); @@ -5752,7 +5781,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) { EXPECT_NE(IPBB->end(), IP.getPoint()); }; - auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { ++NumBodiesGenerated; CaseBBs.push_back(CodeGenIP.getBlock()); SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor(); @@ -6092,7 +6122,7 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc)); Builder.restoreIP(AfterIP); @@ -6155,7 +6185,7 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTargetData( - Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID), + Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc)); Builder.restoreIP(AfterIP); @@ -6266,7 +6296,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, TargetDataIP1, - OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), + OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, nullptr, BodyCB)); @@ -6295,7 +6325,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { }; ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, TargetDataIP2, - OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), + OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID), /* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, nullptr, BodyTargetCB)); @@ -6346,8 +6376,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) { Builder.CreateStore(Builder.getInt32(10), APtr); Builder.CreateStore(Builder.getInt32(20), BPtr); - auto BodyGenCB = [&](InsertPointTy AllocaIP, - InsertPointTy CodeGenIP) -> InsertPointTy { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) -> InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); Builder.restoreIP(CodeGenIP); @@ -6417,10 +6447,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), - Builder.saveIP(), Info, EntryInfo, DefaultAttrs, - RuntimeAttrs, /*IfCond=*/nullptr, Inputs, - GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, - CustomMapperCB, {}, false)); + Builder.saveIP(), {}, Info, EntryInfo, + DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, + Inputs, GenMapInfoCB, BodyGenCB, + SimpleArgAccessorCB, CustomMapperCB, {}, false)); EXPECT_EQ(DL, Builder.getCurrentDebugLocation()); Builder.restoreIP(AfterIP); @@ -6565,8 +6595,9 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { }; auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> OpenMPIRBuilder::InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -6591,7 +6622,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, - Info, EntryInfo, DefaultAttrs, RuntimeAttrs, + {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}, false)); @@ -6672,7 +6703,14 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) { Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt(); EXPECT_EQ(Value1, Value); EXPECT_EQ(Value1->getNextNode(), TargetStore); - auto *Deinit = TargetStore->getNextNode(); + + auto *TargetExitBlockBr = TargetStore->getNextNode(); + EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr)); + + auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0); + EXPECT_EQ(TargetExitBlock->getName(), "target.exit"); + + Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt(); EXPECT_NE(Deinit, nullptr); auto *DeinitCall = dyn_cast<CallInst>(Deinit); @@ -6719,8 +6757,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { IRBuilder<> Builder(BB); auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](InsertPointTy, - InsertPointTy CodeGenIP) -> InsertPointTy { + auto BodyGenCB = [&](InsertPointTy, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy>) -> InsertPointTy { Builder.restoreIP(CodeGenIP); return Builder.saveIP(); }; @@ -6753,10 +6791,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(), - Builder.saveIP(), Info, EntryInfo, DefaultAttrs, - RuntimeAttrs, /*IfCond=*/nullptr, Inputs, - GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, - CustomMapperCB, {})); + Builder.saveIP(), {}, Info, EntryInfo, + DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, + Inputs, GenMapInfoCB, BodyGenCB, + SimpleArgAccessorCB, CustomMapperCB, {})); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -6839,7 +6877,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy>) -> OpenMPIRBuilder::InsertPointTy { Builder.restoreIP(CodeGenIP); OutlinedFn = CodeGenIP.getBlock()->getParent(); @@ -6860,8 +6899,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) { ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget( - Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, Info, - EntryInfo, DefaultAttrs, RuntimeAttrs, + Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, {}, + Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {})); Builder.restoreIP(AfterIP); @@ -6958,8 +6997,9 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { llvm::Value *RaiseAlloca = nullptr; auto CustomMapperCB = [&](unsigned int I) { return nullptr; }; - auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP, - OpenMPIRBuilder::InsertPointTy CodeGenIP) + auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP, + OpenMPIRBuilder::InsertPointTy CodeGenIP, + ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> OpenMPIRBuilder::InsertPointTy { IRBuilderBase::InsertPointGuard guard(Builder); Builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -6985,7 +7025,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, - Info, EntryInfo, DefaultAttrs, RuntimeAttrs, + {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}, false)); @@ -7062,7 +7102,14 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) { EXPECT_TRUE(isa<LoadInst>(Load2)); EXPECT_EQ(Load2, Value); EXPECT_EQ(Load2->getNextNode(), TargetStore); - auto *Deinit = TargetStore->getNextNode(); + + auto *TargetExitBlockBr = TargetStore->getNextNode(); + EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr)); + + auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0); + EXPECT_EQ(TargetExitBlock->getName(), "target.exit"); + + Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt(); EXPECT_NE(Deinit, nullptr); auto *DeinitCall = dyn_cast<CallInst>(Deinit); @@ -7091,8 +7138,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load"); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -7120,7 +7168,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7219,7 +7267,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; @@ -7231,7 +7280,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB)); + /*DeallocIPs=*/{}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7254,7 +7303,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); @@ -7265,7 +7315,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB, + /*DeallocIPs=*/{}, BodyGenCB, /*Tied=*/false)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7290,7 +7340,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; BasicBlock *AllocaBB = Builder.GetInsertBlock(); @@ -7308,7 +7359,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) { OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTask( Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), - BodyGenCB, + /*DeallocIPs=*/{}, BodyGenCB, /*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7370,7 +7421,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); @@ -7381,7 +7433,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) { ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, + OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, + BodyGenCB, /*Tied=*/false, Final)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7428,7 +7481,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { OMPBuilder.initialize(); F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { return Error::success(); }; BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split"); @@ -7438,10 +7492,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) { CmpInst::Predicate::ICMP_EQ, F->getArg(0), ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U)); OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP, - OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB, - /*Tied=*/false, /*Final=*/nullptr, - IfCondition)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, AfterIP, + OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, + /*Tied=*/false, /*Final=*/nullptr, IfCondition)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); Builder.CreateRetVoid(); @@ -7507,8 +7561,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp; - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr, "bodygen.alloca128"); @@ -7536,7 +7591,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {}, BodyGenCB)); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); @@ -7598,14 +7653,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { F->setName("func"); IRBuilder<> Builder(BB); - auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { - Builder.restoreIP(AllocaIP); + auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { + Builder.restoreIP(AllocIP); AllocaInst *Alloca32 = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32"); AllocaInst *Alloca64 = Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64"); Builder.restoreIP(CodeGenIP); - auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto TaskBodyGenCB1 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); LoadInst *LoadValue = Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64); @@ -7614,11 +7671,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1, - OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, TaskIP1, + OMPBuilder.createTask(Loc, AllocIP, DeallocIPs, TaskBodyGenCB1)); Builder.restoreIP(TaskIP1); - auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + auto TaskBodyGenCB2 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP, + ArrayRef<InsertPointTy> DeallocIPs) { Builder.restoreIP(CodeGenIP); LoadInst *LoadValue = Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32); @@ -7627,8 +7686,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { return Error::success(); }; OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL); - ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2, - OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2)); + ASSERT_EXPECTED_INIT( + OpenMPIRBuilder::InsertPointTy, TaskIP2, + OMPBuilder.createTask(Loc2, AllocIP, DeallocIPs, TaskBodyGenCB2)); Builder.restoreIP(TaskIP2); }; @@ -7639,7 +7699,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) { ASSERT_EXPECTED_INIT( OpenMPIRBuilder::InsertPointTy, AfterIP, OMPBuilder.createTaskgroup( - Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), + Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {}, BODYGENCB_WRAPPER(BodyGenCB))); Builder.restoreIP(AfterIP); OMPBuilder.finalize(); diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp index 6fd266a..d63e346 100644 --- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp +++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp @@ -712,7 +712,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) { /* AllowVarArgs */ true, /* AllowAlloca */ true, /* AllocationBlock*/ &Func->getEntryBlock(), - /* DeallocationBlock */ nullptr, + /* DeallocationBlocks */ {}, /* Suffix */ ".outlined", /* ArgsInZeroAddressSpace */ true); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 539e62a..f5d60a5 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -66,14 +66,17 @@ convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) { /// ModuleTranslation stack frame for OpenMP operations. This keeps track of the /// insertion points for allocas. -class OpenMPAllocaStackFrame - : public StateStackFrameBase<OpenMPAllocaStackFrame> { +class OpenMPAllocStackFrame + : public StateStackFrameBase<OpenMPAllocStackFrame> { public: - MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame) - - explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP) - : allocaInsertPoint(allocaIP) {} - llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocStackFrame) + + explicit OpenMPAllocStackFrame( + llvm::OpenMPIRBuilder::InsertPointTy allocIP, + llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs) + : allocInsertPoint(allocIP), deallocInsertPoints(deallocIPs) {} + llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint; + llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints; }; /// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the @@ -482,26 +485,33 @@ static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) { /// Find the insertion point for allocas given the current insertion point for /// normal operations in the builder. -static llvm::OpenMPIRBuilder::InsertPointTy -findAllocaInsertPoint(llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation) { - // If there is an alloca insertion point on stack, i.e. we are in a nested +static llvm::OpenMPIRBuilder::InsertPointTy findAllocInsertPoints( + llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, + llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::InsertPointTy> *deallocIPs = + nullptr) { + // If there is an allocation insertion point on stack, i.e. we are in a nested // operation and a specific point was provided by some surrounding operation, // use it. - llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint; - WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>( - [&](OpenMPAllocaStackFrame &frame) { - allocaInsertPoint = frame.allocaInsertPoint; + llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint; + llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints; + WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocStackFrame>( + [&](OpenMPAllocStackFrame &frame) { + allocInsertPoint = frame.allocInsertPoint; + deallocInsertPoints = frame.deallocInsertPoints; return WalkResult::interrupt(); }); // In cases with multiple levels of outlining, the tree walk might find an - // alloca insertion point that is inside the original function while the - // builder insertion point is inside the outlined function. We need to make - // sure that we do not use it in those cases. + // insertion point that is inside the original function while the builder + // insertion point is inside the outlined function. We need to make sure that + // we do not use it in those cases. if (walkResult.wasInterrupted() && - allocaInsertPoint.getBlock()->getParent() == - builder.GetInsertBlock()->getParent()) - return allocaInsertPoint; + allocInsertPoint.getBlock()->getParent() == + builder.GetInsertBlock()->getParent()) { + if (deallocIPs) + deallocIPs->insert(deallocIPs->end(), deallocInsertPoints.begin(), + deallocInsertPoints.end()); + return allocInsertPoint; + } // Otherwise, insert to the entry block of the surrounding function. // If the current IRBuilder InsertPoint is the function's entry, it cannot @@ -509,7 +519,7 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, // confusion. Create a new BasicBlock for the Builder and use the entry block // for the allocs. // TODO: Create a dedicated alloca BasicBlock at function creation such that - // we do not need to move the current InertPoint here. + // we do not need to move the current InsertPoint here. if (builder.GetInsertBlock() == &builder.GetInsertBlock()->getParent()->getEntryBlock()) { assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() && @@ -521,6 +531,16 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder, builder.SetInsertPoint(entryBB); } + // Collect exit blocks, which is where explicit deallocations should happen in + // this case. + if (deallocIPs) { + for (llvm::BasicBlock &block : *builder.GetInsertBlock()->getParent()) { + llvm::Instruction *terminator = block.getTerminator(); + if (isa_and_present<llvm::ReturnInst>(terminator)) + deallocIPs->emplace_back(&block, terminator->getIterator()); + } + } + llvm::BasicBlock &funcEntryBlock = builder.GetInsertBlock()->getParent()->getEntryBlock(); return llvm::OpenMPIRBuilder::InsertPointTy( @@ -708,7 +728,8 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { // MaskedOp has only one region associated with it. auto ®ion = maskedOp.getRegion(); builder.restoreIP(codeGenIP); @@ -752,7 +773,8 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { // MasterOp has only one region associated with it. auto ®ion = masterOp.getRegion(); builder.restoreIP(codeGenIP); @@ -787,7 +809,8 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { // CriticalOp has only one region associated with it. auto ®ion = cast<omp::CriticalOp>(opInst).getRegion(); builder.restoreIP(codeGenIP); @@ -1047,7 +1070,7 @@ convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder, indexVecValues++; } llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend( ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource)); @@ -1066,7 +1089,8 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(opInst))) return failure(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { // OrderedOp has only one region associated with it. auto ®ion = cast<omp::OrderedRegionOp>(opInst).getRegion(); builder.restoreIP(codeGenIP); @@ -1862,7 +1886,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector<omp::DeclareReductionOp> reductionDecls; collectReductionDecls(sectionsOp, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); SmallVector<llvm::Value *> privateReductionVariables( sectionsOp.getNumReductionVars()); @@ -1886,7 +1910,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, Region ®ion = sectionOp.getRegion(); auto sectionCB = [§ionsOp, ®ion, &builder, &moduleTranslation]( - InsertPointTy allocaIP, InsertPointTy codeGenIP) { + InsertPointTy allocIP, InsertPointTy codeGenIP, + ArrayRef<InsertPointTy> deallocIPs) { builder.restoreIP(codeGenIP); // map the omp.section reduction block argument to the omp.sections block @@ -1931,7 +1956,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, // called for variables which have destructors/finalizers. auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); }; - allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + allocaIP = findAllocInsertPoints(builder, moduleTranslation); bool isCancellable = constructIsCancellable(sectionsOp); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = @@ -1960,7 +1985,8 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(*singleOp))) return failure(); - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { builder.restoreIP(codegenIP); return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region", builder, moduleTranslation) @@ -2043,7 +2069,7 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, SmallVector<llvm::Value *> privateReductionVariables(numReductionVars); llvm::ArrayRef<bool> isByRef; llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); // Only do teams reduction if there is no distribute op that captures the // reduction instead. @@ -2065,9 +2091,10 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder, return failure(); } - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); builder.restoreIP(codegenIP); return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder, moduleTranslation) @@ -2324,9 +2351,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // code outside of the outlined task region, which is what we want because // this way the initialization and copy regions are executed immediately while // the host variable data are still live. - - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<InsertPointTy> deallocIPs; + InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); // Not using splitBB() because that requires the current block to have a // terminator. @@ -2356,8 +2383,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); // Allocate and initialize private variables builder.SetInsertPoint(initBlock->getTerminator()); @@ -2421,12 +2448,12 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, // Set up for call to createTask() builder.SetInsertPoint(taskStartBlock); - auto bodyCB = [&](InsertPointTy allocaIP, - InsertPointTy codegenIP) -> llvm::Error { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); // translate the body of the task: builder.restoreIP(codegenIP); @@ -2444,7 +2471,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvm::IRBuilderBase::InsertPointGuard guard(builder); llvm::Type *llvmAllocType = moduleTranslation.convertType(privDecl.getType()); - builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); + builder.SetInsertPoint(allocIP.getBlock()->getTerminator()); llvm::Value *llvmPrivateVar = builder.CreateAlloca( llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc"); @@ -2518,7 +2545,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTask( - ompLoc, allocaIP, bodyCB, !taskOp.getUntied(), + ompLoc, allocIP, deallocIPs, bodyCB, !taskOp.getUntied(), moduleTranslation.lookupValue(taskOp.getFinal()), moduleTranslation.lookupValue(taskOp.getIfExpr()), dds, taskOp.getMergeable(), @@ -2543,18 +2570,21 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, if (failed(checkImplementationStatus(*tgOp))) return failure(); - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) { + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) { builder.restoreIP(codegenIP); return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region", builder, moduleTranslation) .takeError(); }; - InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<InsertPointTy> deallocIPs; + InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP, - bodyCB); + moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocIP, + deallocIPs, bodyCB); if (failed(handleError(afterIP, *tgOp))) return failure(); @@ -2604,8 +2634,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, SmallVector<omp::DeclareReductionOp> reductionDecls; collectReductionDecls(wsloopOp, reductionDecls); + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); SmallVector<llvm::Value *> privateReductionVariables( wsloopOp.getNumReductionVars()); @@ -2762,10 +2793,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getNumReductionVars()); SmallVector<DeferredStore> deferredStores; - auto bodyGenCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> llvm::Error { + auto bodyGenCB = + [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error { llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( - opInst, builder, moduleTranslation, privateVarsInfo, allocaIP); + opInst, builder, moduleTranslation, privateVarsInfo, allocIP); if (handleError(afterAllocas, *opInst).failed()) return llvm::make_error<PreviouslyReportedError>(); @@ -2775,12 +2807,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, MutableArrayRef<BlockArgument> reductionArgs = cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs(); - allocaIP = - InsertPointTy(allocaIP.getBlock(), - allocaIP.getBlock()->getTerminator()->getIterator()); + allocIP = InsertPointTy(allocIP.getBlock(), + allocIP.getBlock()->getTerminator()->getIterator()); if (failed(allocReductionVars( - opInst, reductionArgs, builder, moduleTranslation, allocaIP, + opInst, reductionArgs, builder, moduleTranslation, allocIP, reductionDecls, privateReductionVariables, reductionVariableMap, deferredStores, isByRef))) return llvm::make_error<PreviouslyReportedError>(); @@ -2809,8 +2840,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); // ParallelOp has only one region associated with it. llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions( @@ -2837,7 +2868,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint = ompBuilder->createReductions( - builder.saveIP(), allocaIP, reductionInfos, isByRef, + builder.saveIP(), allocIP, reductionInfos, isByRef, /*IsNoWait=*/false, /*IsTeamsReduction=*/false); if (!contInsertPoint) return contInsertPoint.takeError(); @@ -2898,13 +2929,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, pbKind = getProcBindKind(*bind); bool isCancellable = constructIsCancellable(opInst); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB, - ifCond, numThreads, pbKind, isCancellable); + ompBuilder->createParallel(ompLoc, allocIP, deallocIPs, bodyGenCB, privCB, + finiCB, ifCond, numThreads, pbKind, + isCancellable); if (failed(handleError(afterIP, *opInst))) return failure(); @@ -2949,7 +2982,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, assert(isByRef.size() == simdOp.getNumReductionVars()); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( simdOp, builder, moduleTranslation, privateVarsInfo, allocaIP); @@ -3255,7 +3288,7 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); @@ -3282,7 +3315,7 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder()); @@ -3399,7 +3432,7 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst, extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory); // Handle ambiguous alloca, if any. - auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + auto allocaIP = findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr, @@ -3500,7 +3533,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp, extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory); // Handle ambiguous alloca, if any. - auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + auto allocaIP = findAllocInsertPoints(builder, moduleTranslation); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = ompBuilder->createAtomicCapture( @@ -4464,7 +4497,7 @@ createAlteredByCaptureMap(MapInfoData &mapData, if (!isPtrTy) { auto curInsert = builder.saveIP(); llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation(); - builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation)); + builder.restoreIP(findAllocInsertPoints(builder, moduleTranslation)); auto *memTempAlloc = builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted"); builder.SetCurrentDebugLocation(DbgLoc); @@ -4842,18 +4875,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, }; llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() { if (isa<omp::TargetDataOp>(op)) - return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(), + return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(), + deallocIPs, builder.getInt64(deviceID), ifCond, info, genMapInfoCB, customMapperCB, /*MapperFunc=*/nullptr, bodyGenCB, /*DeviceAddrCB=*/nullptr); - return ompBuilder->createTargetData( - ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond, - info, genMapInfoCB, customMapperCB, &RTLFn); + return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(), + deallocIPs, builder.getInt64(deviceID), + ifCond, info, genMapInfoCB, + customMapperCB, &RTLFn); }(); if (failed(handleError(afterIP, *op))) @@ -4889,7 +4925,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, collectReductionDecls(teamsOp, reductionDecls); llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + findAllocInsertPoints(builder, moduleTranslation); MutableArrayRef<BlockArgument> reductionArgs = llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp) @@ -4903,19 +4939,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, } using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyGenCB = [&](InsertPointTy allocaIP, - InsertPointTy codeGenIP) -> llvm::Error { + auto bodyGenCB = + [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error { // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. - LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame( - moduleTranslation, allocaIP); + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); // DistributeOp has only one region associated with it. builder.restoreIP(codeGenIP); PrivateVarsInfo privVarsInfo(distributeOp); llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars( - distributeOp, builder, moduleTranslation, privVarsInfo, allocaIP); + distributeOp, builder, moduleTranslation, privVarsInfo, allocIP); if (handleError(afterAllocas, opInst).failed()) return llvm::make_error<PreviouslyReportedError>(); @@ -4958,7 +4995,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, findCurrentLoopInfo(moduleTranslation); llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = ompBuilder->applyWorkshareLoop( - ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, + ompLoc.DL, loopInfo, allocIP, loopNeedsBarrier, convertToScheduleKind(schedule), chunk, isSimd, scheduleMod == omp::ScheduleModifier::monotonic, scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, @@ -4975,11 +5012,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, return llvm::Error::success(); }; - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = - ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB); + ompBuilder->createDistribute(ompLoc, allocIP, deallocIPs, bodyGenCB); if (failed(handleError(afterIP, opInst))) return failure(); @@ -4989,7 +5027,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder, if (doDistributeReduction) { // Process the reductions if required. return createReductionsAndCleanup( - teamsOp, builder, moduleTranslation, allocaIP, reductionDecls, + teamsOp, builder, moduleTranslation, allocIP, reductionDecls, privateReductionVariables, isByRef, /*isNoWait*/ false, /*isTeamsReduction*/ true); } @@ -5659,7 +5697,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, } using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; - auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) + auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP, + ArrayRef<InsertPointTy> deallocIPs) -> llvm::OpenMPIRBuilder::InsertPointOrErrorTy { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.SetCurrentDebugLocation(llvm::DebugLoc()); @@ -5701,7 +5740,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(targetOp, builder, moduleTranslation, - privateVarsInfo, allocaIP, &mappedPrivateVars); + privateVarsInfo, allocIP, &mappedPrivateVars); if (failed(handleError(afterAllocas, *targetOp))) return llvm::make_error<PreviouslyReportedError>(); @@ -5726,6 +5765,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, return &privatizer.getDeallocRegion(); }); + LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame( + moduleTranslation, allocIP, deallocIPs); llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions( targetRegion, "omp.target", builder, moduleTranslation); @@ -5829,8 +5870,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(), moduleTranslation, dds); - llvm::OpenMPIRBuilder::InsertPointTy allocaIP = - findAllocaInsertPoint(builder, moduleTranslation); + llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs; + llvm::OpenMPIRBuilder::InsertPointTy allocIP = + findAllocInsertPoints(builder, moduleTranslation, &deallocIPs); llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); llvm::OpenMPIRBuilder::TargetDataInfo info( @@ -5852,9 +5894,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = moduleTranslation.getOpenMPBuilder()->createTarget( - ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo, - defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB, - argAccessorCB, customMapperCB, dds, targetOp.getNowait()); + ompLoc, isOffloadEntry, allocIP, builder.saveIP(), deallocIPs, info, + entryInfo, defaultAttrs, runtimeAttrs, ifCond, kernelInput, + genMapInfoCB, bodyCB, argAccessorCB, customMapperCB, dds, + targetOp.getNowait()); if (failed(handleError(afterIP, opInst))) return failure(); diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index ca998b4..c3ce2f6 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -55,21 +55,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]]( // CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) #{{[0-9]+}} { // CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5) -// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr -// CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5) -// CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr -// CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8 -// CHECK: %[[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]]) -// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1 +// CHECK: %[[TMP2:.*]] = alloca ptr, align 8, addrspace(5) +// CHECK: %[[TMP3:.*]] = addrspacecast ptr addrspace(5) %[[TMP2]] to ptr +// CHECK: store ptr %[[TMP0]], ptr %[[TMP3]], align 8 +// CHECK: %[[TMP4:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]]) +// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP4]], -1 // CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]] -// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8 +// CHECK: %[[TMP5:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr // CHECK: %[[STRUCTARG:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8) +// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP3]], align 8 // CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr)) // CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0 // CHECK: store ptr %[[TMP6]], ptr %[[GEP_]], align 8 -// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0 +// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP5]], i64 0, i64 0 // CHECK: store ptr %[[STRUCTARG]], ptr %[[TMP7]], align 8 -// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP2]], i64 1) +// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP5]], i64 1) // CHECK: call void @__kmpc_free_shared(ptr %[[STRUCTARG]], i64 8) // CHECK: call void @__kmpc_target_deinit() diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index 5a76871..3ebb79f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -56,7 +56,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: %[[B:.*]] = load i32, ptr %[[PTR_B]], align 4 // CHECK: %[[C:.*]] = add i32 %[[A]], %[[B]] // CHECK: store i32 %[[C]], ptr %[[PTR_C]], align 4 -// CHECK: br label %[[LABEL_DEINIT:.*]] +// CHECK: br label %[[LABEL_TARGET_EXIT:.*]] +// CHECK: [[LABEL_TARGET_EXIT]]: +// CHECK-NEXT: br label %[[LABEL_DEINIT:.*]] // CHECK: [[LABEL_DEINIT]]: // CHECK-NEXT: call void @__kmpc_target_deinit() // CHECK-NEXT: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir index 0ee9230..2aa11f3 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir @@ -70,4 +70,6 @@ llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct // CHECK: call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]]) // CHECK-NEXT: br label %[[CONT_BLOCK:.*]] // CHECK: [[CONT_BLOCK]]: +// CHECK-NEXT: br label %[[EXIT_BLOCK:.*]] +// CHECK: [[EXIT_BLOCK]]: // CHECK-NEXT: ret void |