aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--clang/lib/CodeGen/CGOpenMPRuntime.cpp4
-rw-r--r--clang/lib/CodeGen/CGStmtOpenMP.cpp77
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h99
-rw-r--r--llvm/include/llvm/Transforms/Utils/CodeExtractor.h24
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp298
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp11
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp37
-rw-r--r--llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp328
-rw-r--r--llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp2
-rw-r--r--mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp233
-rw-r--r--mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir18
-rw-r--r--mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir4
-rw-r--r--mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir2
15 files changed, 629 insertions, 514 deletions
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index f98339d..f0cb753 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10500,8 +10500,8 @@ void CGOpenMPRuntime::emitTargetDataCalls(
llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail(OMPBuilder.createTargetData(
- OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCondVal, Info, GenMapInfoCB,
- CustomMapperCB,
+ OmpLoc, AllocaIP, CodeGenIP, /*DeallocIPs=*/{}, DeviceID, IfCondVal,
+ Info, GenMapInfoCB, CustomMapperCB,
/*MapperFunc=*/nullptr, BodyCB, DeviceAddrCB, RTLoc));
CGF.Builder.restoreIP(AfterIP);
}
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f6a0ca5..c9ac207 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -1835,10 +1835,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
- auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
- *this, ParallelRegionBodyStmt, AllocaIP, CodeGenIP, "parallel");
+ *this, ParallelRegionBodyStmt, AllocIP, CodeGenIP, "parallel");
return llvm::Error::success();
};
@@ -1846,9 +1846,10 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
- llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(
- OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
- IfCond, NumThreads, ProcBind, S.hasCancel()));
+ llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
+ cantFail(OMPBuilder.createParallel(
+ Builder, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB, PrivCB, FiniCB,
+ IfCond, NumThreads, ProcBind, S.hasCancel()));
Builder.restoreIP(AfterIP);
return;
}
@@ -4361,21 +4362,23 @@ void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
if (CS) {
for (const Stmt *SubStmt : CS->children()) {
- auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
- OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, SubStmt, AllocaIP, CodeGenIP, "section");
+ auto SectionCB = [this, SubStmt](InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(*this, SubStmt, AllocIP,
+ CodeGenIP, "section");
return llvm::Error::success();
};
SectionCBVector.push_back(SectionCB);
}
} else {
- auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
- OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, CapturedStmt, AllocaIP, CodeGenIP, "section");
- return llvm::Error::success();
- };
+ auto SectionCB =
+ [this, CapturedStmt](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
+ *this, CapturedStmt, AllocIP, CodeGenIP, "section");
+ return llvm::Error::success();
+ };
SectionCBVector.push_back(SectionCB);
}
@@ -4429,10 +4432,11 @@ void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
return llvm::Error::success();
};
- auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [SectionRegionBodyStmt,
+ this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, SectionRegionBodyStmt, AllocaIP, CodeGenIP, "section");
+ *this, SectionRegionBodyStmt, AllocIP, CodeGenIP, "section");
return llvm::Error::success();
};
@@ -4514,10 +4518,11 @@ void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
return llvm::Error::success();
};
- auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [MasterRegionBodyStmt,
+ this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, MasterRegionBodyStmt, AllocaIP, CodeGenIP, "master");
+ *this, MasterRegionBodyStmt, AllocIP, CodeGenIP, "master");
return llvm::Error::success();
};
@@ -4564,10 +4569,11 @@ void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
return llvm::Error::success();
};
- auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [MaskedRegionBodyStmt,
+ this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, MaskedRegionBodyStmt, AllocaIP, CodeGenIP, "masked");
+ *this, MaskedRegionBodyStmt, AllocIP, CodeGenIP, "masked");
return llvm::Error::success();
};
@@ -4607,10 +4613,11 @@ void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
return llvm::Error::success();
};
- auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [CriticalRegionBodyStmt,
+ this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, CriticalRegionBodyStmt, AllocaIP, CodeGenIP, "critical");
+ *this, CriticalRegionBodyStmt, AllocIP, CodeGenIP, "critical");
return llvm::Error::success();
};
@@ -5577,8 +5584,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
AllocaInsertPt->getIterator());
- auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&, this](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
return llvm::Error::success();
@@ -5587,7 +5594,8 @@ void CodeGenFunction::EmitOMPTaskgroupDirective(
if (!CapturedStmtInfo)
CapturedStmtInfo = &CapStmtInfo;
llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
- cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP, BodyGenCB));
+ cantFail(OMPBuilder.createTaskgroup(Builder, AllocaIP,
+ /*DeallocIPs=*/{}, BodyGenCB));
Builder.restoreIP(AfterIP);
return;
}
@@ -6167,8 +6175,9 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
return llvm::Error::success();
};
- auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&S, C, this](InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
const CapturedStmt *CS = S.getInnermostCapturedStmt();
@@ -6186,7 +6195,7 @@ void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
OutlinedFn, CapturedVars);
} else {
OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
- *this, CS->getCapturedStmt(), AllocaIP, CodeGenIP, "ordered");
+ *this, CS->getCapturedStmt(), AllocIP, CodeGenIP, "ordered");
}
return llvm::Error::success();
};
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 36be9bf..d033691 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -602,17 +602,19 @@ public:
/// such InsertPoints need to be preserved, it can split the block itself
/// before calling the callback.
///
- /// AllocaIP and CodeGenIP must not point to the same position.
- ///
- /// \param AllocaIP is the insertion point at which new alloca instructions
- /// should be placed. The BasicBlock it is pointing to must
- /// not be split.
- /// \param CodeGenIP is the insertion point at which the body code should be
- /// placed.
- ///
+ /// AllocIP and CodeGenIP must not point to the same position.
+ ///
+ /// \param AllocIP is the insertion point at which new allocations should
+ /// be placed. The BasicBlock it is pointing to must not be
+ /// split.
+ /// \param CodeGenIP is the insertion point at which the body code should be
+ /// placed.
+ /// \param DeallocIPs is the list of insertion points where explicit
+ /// deallocations, if needed, should be placed.
/// \return an error, if any were triggered during execution.
using BodyGenCallbackTy =
- function_ref<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+ function_ref<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs)>;
// This is created primarily for sections construct as llvm::function_ref
// (BodyGenCallbackTy) is not storable (as described in the comments of
@@ -621,7 +623,8 @@ public:
///
/// \return an error, if any were triggered during execution.
using StorableBodyGenCallbackTy =
- std::function<Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+ std::function<Error(InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs)>;
/// Callback type for loop body code generation.
///
@@ -715,7 +718,9 @@ public:
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
- /// \param AllocaIP The insertion points to be used for alloca instructions.
+ /// \param AllocIP The insertion point to be used for allocations.
+ /// \param DeallocIPs The insertion points to be used for explicit
+ /// deallocations, if needed.
/// \param BodyGenCB Callback that will generate the region code.
/// \param PrivCB Callback to copy a given variable (think copy constructor).
/// \param FiniCB Callback to finalize variable copies.
@@ -726,10 +731,10 @@ public:
///
/// \returns The insertion position *after* the parallel.
LLVM_ABI InsertPointOrErrorTy createParallel(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
- FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
- omp::ProcBindKind ProcBind, bool IsCancellable);
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable);
/// Generator for the control flow structure of an OpenMP canonical loop.
///
@@ -1347,7 +1352,9 @@ public:
/// Generator for `#omp task`
///
/// \param Loc The location where the task construct was encountered.
- /// \param AllocaIP The insertion point to be used for alloca instructions.
+ /// \param AllocIP The insertion point to be used for allocations.
+ /// \param DeallocIPs The insertion points to be used for explicit
+ /// deallocations, if needed.
/// \param BodyGenCB Callback that will generate the region code.
/// \param Tied True if the task is tied, false if the task is untied.
/// \param Final i1 value which is `true` if the task is final, `false` if the
@@ -1363,21 +1370,23 @@ public:
/// \param Mergeable If the given task is `mergeable`
/// \param priority `priority-value' specifies the execution order of the
/// tasks that is generated by the construct
- LLVM_ABI InsertPointOrErrorTy
- createTask(const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, bool Tied = true,
- Value *Final = nullptr, Value *IfCondition = nullptr,
- SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
- Value *EventHandle = nullptr, Value *Priority = nullptr);
+ LLVM_ABI InsertPointOrErrorTy createTask(
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB,
+ bool Tied = true, Value *Final = nullptr, Value *IfCondition = nullptr,
+ SmallVector<DependData> Dependencies = {}, bool Mergeable = false,
+ Value *EventHandle = nullptr, Value *Priority = nullptr);
/// Generator for the taskgroup construct
///
/// \param Loc The location where the taskgroup construct was encountered.
- /// \param AllocaIP The insertion point to be used for alloca instructions.
+ /// \param AllocIP The insertion point to be used for allocations.
+ /// \param DeallocIPs The insertion point to be used for explicit deallocation
+ /// instructions, if needed.
/// \param BodyGenCB Callback that will generate the region code.
- LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc,
- InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB);
+ LLVM_ABI InsertPointOrErrorTy createTaskgroup(
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB);
using FileIdentifierInfoCallbackTy =
std::function<std::tuple<std::string, uint64_t>()>;
@@ -2246,7 +2255,8 @@ public:
struct OutlineInfo {
using PostOutlineCBTy = std::function<void(Function &)>;
PostOutlineCBTy PostOutlineCB;
- BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB;
+ BasicBlock *EntryBB, *ExitBB, *OuterAllocBB;
+ SmallVector<BasicBlock *> OuterDeallocBBs;
SmallVector<Value *, 2> ExcludeArgsFromAggregate;
LLVM_ABI virtual ~OutlineInfo() = default;
@@ -2319,7 +2329,8 @@ public:
/// \return an error, if any were triggered during execution.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
BodyGenCallbackTy ElseGen,
- InsertPointTy AllocaIP = {});
+ InsertPointTy AllocIP = {},
+ ArrayRef<InsertPointTy> DeallocIPs = {});
/// Create the global variable holding the offload mappings information.
LLVM_ABI GlobalVariable *
@@ -2874,11 +2885,13 @@ public:
/// Generator for `#omp distribute`
///
/// \param Loc The location where the distribute construct was encountered.
- /// \param AllocaIP The insertion points to be used for alloca instructions.
+ /// \param AllocIP The insertion point to be used for allocations.
+ /// \param DeallocIPs The insertion points to be used for explicit
+ /// deallocations, if needed.
/// \param BodyGenCB Callback that will generate the region code.
- LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc,
- InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB);
+ LLVM_ABI InsertPointOrErrorTy createDistribute(
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB);
/// Generate conditional branch and relevant BasicBlocks through which private
/// threads copy the 'copyin' variables from Master copy to threadprivate
@@ -3206,9 +3219,11 @@ public:
/// Generator for '#omp target data'
///
/// \param Loc The location where the target data construct was encountered.
- /// \param AllocaIP The insertion points to be used for alloca instructions.
+ /// \param AllocIP The insertion points to be used for allocations.
/// \param CodeGenIP The insertion point at which the target directive code
/// should be placed.
+ /// \param DeallocIPs The insertion points at which explicit deallocations
+ /// should be placed, if needed.
/// \param IsBegin If true then emits begin mapper call otherwise emits
/// end mapper call.
/// \param DeviceID Stores the DeviceID from the device clause.
@@ -3221,10 +3236,10 @@ public:
/// \param DeviceAddrCB Optional callback to generate code related to
/// use_device_ptr and use_device_addr.
LLVM_ABI InsertPointOrErrorTy createTargetData(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
- TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
- CustomMapperCallbackTy CustomMapperCB,
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs,
+ Value *DeviceID, Value *IfCond, TargetDataInfo &Info,
+ GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB,
omp::RuntimeFunction *MapperFunc = nullptr,
function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
BodyGenTy BodyGenType)>
@@ -3233,7 +3248,8 @@ public:
Value *SrcLocInfo = nullptr);
using TargetBodyGenCallbackTy = function_ref<InsertPointOrErrorTy(
- InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+ InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs)>;
using TargetGenArgAccessorsCallbackTy = function_ref<InsertPointOrErrorTy(
Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
@@ -3245,6 +3261,8 @@ public:
/// \param IsOffloadEntry whether it is an offload entry.
/// \param CodeGenIP The insertion point where the call to the outlined
/// function should be emitted.
+ /// \param DeallocIPs The insertion points at which explicit deallocations
+ /// should be placed, if needed.
/// \param Info Stores all information realted to the Target directive.
/// \param EntryInfo The entry information about the function.
/// \param DefaultAttrs Structure containing the default attributes, including
@@ -3265,8 +3283,9 @@ public:
/// not.
LLVM_ABI InsertPointOrErrorTy createTarget(
const LocationDescription &Loc, bool IsOffloadEntry,
- OpenMPIRBuilder::InsertPointTy AllocaIP,
- OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info,
+ OpenMPIRBuilder::InsertPointTy AllocIP,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs, TargetDataInfo &Info,
TargetRegionEntryInfo &EntryInfo,
const TargetKernelDefaultAttrs &DefaultAttrs,
const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond,
diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index b3bea96..7b1e3a7 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -100,13 +100,13 @@ public:
/// will be placed in the entry block of the function.
BasicBlock *AllocationBlock;
- /// A block outside of the extraction set where deallocations for
- /// intermediate allocations can be placed inside. Not used for
- /// automatically deallocated memory (e.g. `alloca`), which is the default.
+ /// A set of blocks outside of the extraction set where deallocations for
+ /// intermediate allocations should be placed. Not used for automatically
+ /// deallocated memory (e.g. `alloca`), which is the default.
///
- /// If it is null and needed, the end of the replacement basic block will be
- /// used to place deallocations.
- BasicBlock *DeallocationBlock;
+ /// If it is empty and needed, the end of the replacement basic block will
+ /// be used to place deallocations.
+ SmallVector<BasicBlock *> DeallocationBlocks;
/// If true, varargs functions can be extracted.
bool AllowVarArgs;
@@ -156,11 +156,11 @@ public:
/// Any new allocations will be placed in the AllocationBlock, unless
/// it is null, in which case it will be placed in the entry block of
/// the function from which the code is being extracted. Explicit
- /// deallocations for the aforementioned allocations will be placed in the
- /// DeallocationBlock or the end of the replacement block, if needed.
- /// If ArgsInZeroAddressSpace param is set to true, then the aggregate
- /// param pointer of the outlined function is declared in zero address
- /// space.
+ /// deallocations for the aforementioned allocations will be placed, if
+ /// needed, in all blocks in DeallocationBlocks or the end of the
+ /// replacement block. If ArgsInZeroAddressSpace param is set to true, then
+ /// the aggregate param pointer of the outlined function is declared in zero
+ /// address space.
LLVM_ABI
CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
@@ -168,7 +168,7 @@ public:
AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
bool AllowAlloca = false,
BasicBlock *AllocationBlock = nullptr,
- BasicBlock *DeallocationBlock = nullptr,
+ ArrayRef<BasicBlock *> DeallocationBlocks = {},
std::string Suffix = "", bool ArgsInZeroAddressSpace = false);
LLVM_ABI virtual ~CodeExtractor() = default;
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8b22112..21364c2 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -476,10 +476,10 @@ public:
AssumptionCache *AC = nullptr, bool AllowVarArgs = false,
bool AllowAlloca = false,
BasicBlock *AllocationBlock = nullptr,
- BasicBlock *DeallocationBlock = nullptr,
+ ArrayRef<BasicBlock *> DeallocationBlocks = {},
std::string Suffix = "", bool ArgsInZeroAddressSpace = false)
: CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
- AllowAlloca, AllocationBlock, DeallocationBlock, Suffix,
+ AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
ArgsInZeroAddressSpace),
OMPBuilder(OMPBuilder) {}
@@ -491,32 +491,16 @@ protected:
class DeviceSharedMemCodeExtractor : public OMPCodeExtractor {
public:
- DeviceSharedMemCodeExtractor(
- OpenMPIRBuilder &OMPBuilder, BasicBlock *AllocBlockOverride,
- ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
- bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
- BranchProbabilityInfo *BPI = nullptr, AssumptionCache *AC = nullptr,
- bool AllowVarArgs = false, bool AllowAlloca = false,
- BasicBlock *AllocationBlock = nullptr,
- BasicBlock *DeallocationBlock = nullptr, std::string Suffix = "",
- bool ArgsInZeroAddressSpace = false)
- : OMPCodeExtractor(OMPBuilder, BBs, DT, AggregateArgs, BFI, BPI, AC,
- AllowVarArgs, AllowAlloca, AllocationBlock,
- DeallocationBlock, Suffix, ArgsInZeroAddressSpace),
- AllocBlockOverride(AllocBlockOverride) {}
+ using OMPCodeExtractor::OMPCodeExtractor;
virtual ~DeviceSharedMemCodeExtractor() = default;
protected:
virtual Instruction *
- allocateVar(BasicBlock *, BasicBlock::iterator, Type *VarType,
+ allocateVar(BasicBlock *BB, BasicBlock::iterator AllocIP, Type *VarType,
const Twine &Name = Twine(""),
AddrSpaceCastInst **CastedAlloc = nullptr) override {
- // Ignore the CastedAlloc pointer, if requested, because shared memory
- // should not be casted to address space 0 to be passed around.
return OMPBuilder.createOMPAllocShared(
- OpenMPIRBuilder::InsertPointTy(
- AllocBlockOverride, AllocBlockOverride->getFirstInsertionPt()),
- VarType, Name);
+ OpenMPIRBuilder::InsertPointTy(BB, AllocIP), VarType, Name);
}
virtual Instruction *deallocateVar(BasicBlock *BB,
@@ -525,19 +509,12 @@ protected:
return OMPBuilder.createOMPFreeShared(
OpenMPIRBuilder::InsertPointTy(BB, DeallocIP), Var, VarType);
}
-
-private:
- // TODO: Remove the need for this override and instead get the CodeExtractor
- // to provide a valid insert point for explicit deallocations by correctly
- // populating its DeallocationBlock.
- BasicBlock *AllocBlockOverride;
};
/// Helper storing information about regions to outline using device shared
/// memory for intermediate allocations.
struct DeviceSharedMemOutlineInfo : public OpenMPIRBuilder::OutlineInfo {
OpenMPIRBuilder &OMPBuilder;
- BasicBlock *AllocBlockOverride = nullptr;
DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
: OMPBuilder(OMPBuilder) {}
@@ -1667,11 +1644,11 @@ hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn,
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
- const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
- BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
- FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
- omp::ProcBindKind ProcBind, bool IsCancellable) {
- assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
+ const LocationDescription &Loc, InsertPointTy OuterAllocIP,
+ ArrayRef<InsertPointTy> OuterDeallocIPs, BodyGenCallbackTy BodyGenCB,
+ PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition,
+ Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable) {
+ assert(!isConflictIP(Loc.IP, OuterAllocIP) && "IPs must not be ambiguous");
if (!updateToLocation(Loc))
return Loc.IP;
@@ -1711,7 +1688,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
// Save the outer alloca block because the insertion iterator may get
// invalidated and we still need this later.
- BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
+ BasicBlock *OuterAllocaBlock = OuterAllocIP.getBlock();
// Vector to remember instructions we used only during the modeling but which
// we want to delete at the end.
@@ -1807,7 +1784,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
// Let the caller create the body.
assert(BodyGenCB && "Expected body generation callback!");
InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
- if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
+ InsertPointTy DeallocIP(PRegExitBB, PRegExitBB->begin());
+ if (Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, DeallocIP))
return Err;
LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
@@ -1820,35 +1798,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
// If OuterFn is not a Generic kernel, skip custom allocation. This causes
// the CodeExtractor to follow its default behavior. Otherwise, we need to
// use device shared memory to allocate argument structures.
- if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC) {
- auto Info = std::make_unique<DeviceSharedMemOutlineInfo>(*this);
-
- // Instead of using the insertion point provided by the CodeExtractor,
- // here we need to use the block that eventually calls the outlined
- // function for the `parallel` construct.
- //
- // The reason is that the explicit deallocation call will be inserted
- // within the outlined function, whereas the alloca insertion point
- // might actually be located somewhere else in the caller. This becomes
- // a problem when e.g. `parallel` is inside of a `distribute` construct,
- // because the deallocation would be executed multiple times and the
- // allocation just once (outside of the loop).
- //
- // TODO: Ideally, we'd want to do the allocation and deallocation
- // outside of the `parallel` outlined function, hence using here the
- // insertion point provided by the CodeExtractor. We can't do this at
- // the moment because there is currently no way of passing an eligible
- // insertion point for the explicit deallocation to the CodeExtractor,
- // as that block is created (at least when nested inside of
- // `distribute`) sometime after createParallel() completed, so it can't
- // be stored in the OutlineInfo structure here.
- //
- // The current approach results in an explicit allocation and
- // deallocation pair for each `distribute` loop iteration in that case,
- // which is suboptimal.
- Info->AllocBlockOverride = EntryBB;
- return Info;
- }
+ if (ExecMode && *ExecMode & OMP_TGT_EXEC_MODE_GENERIC)
+ return std::make_unique<DeviceSharedMemOutlineInfo>(*this);
}
return std::make_unique<OutlineInfo>();
}();
@@ -1870,9 +1821,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
};
}
- OI->OuterAllocaBB = OuterAllocaBlock;
+ OI->OuterAllocBB = OuterAllocaBlock;
OI->EntryBB = PRegEntryBB;
OI->ExitBB = PRegExitBB;
+ OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size());
+ for (InsertPointTy DeallocIP : OuterDeallocIPs)
+ OI->OuterDeallocBBs.push_back(DeallocIP.getBlock());
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
SmallVector<BasicBlock *, 32> Blocks;
@@ -1887,7 +1841,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocationBlock */ OuterAllocaBlock,
- /* DeallocationBlock */ nullptr,
+ /* DeallocationBlocks */ {},
/* Suffix */ ".omp_par", ArgsInZeroAddressSpace);
// Find inputs to, outputs from the code region.
@@ -1933,7 +1887,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
IRBuilder<>::InsertPointGuard Guard(Builder);
LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
- Builder.restoreIP(OuterAllocaIP);
+ Builder.restoreIP(OuterAllocIP);
Value *Ptr =
Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
@@ -1985,7 +1939,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
// Reset the outer alloca insertion point to the entry of the relevant block
// in case it was invalidated.
- OuterAllocaIP = IRBuilder<>::InsertPoint(
+ OuterAllocIP = IRBuilder<>::InsertPoint(
OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
for (Value *Input : Inputs) {
@@ -2151,10 +2105,10 @@ static Value *emitTaskDependencies(
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
- SmallVector<DependData> Dependencies, bool Mergeable, Value *EventHandle,
- Value *Priority) {
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB, bool Tied,
+ Value *Final, Value *IfCondition, SmallVector<DependData> Dependencies,
+ bool Mergeable, Value *EventHandle, Value *Priority) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2186,18 +2140,22 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
InsertPointTy TaskAllocaIP =
InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
- if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
+ InsertPointTy TaskDeallocIP = InsertPointTy(TaskExitBB, TaskExitBB->begin());
+ if (Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskDeallocIP))
return Err;
auto OI = std::make_unique<OutlineInfo>();
OI->EntryBB = TaskAllocaBB;
- OI->OuterAllocaBB = AllocaIP.getBlock();
+ OI->OuterAllocBB = AllocIP.getBlock();
OI->ExitBB = TaskExitBB;
+ OI->OuterDeallocBBs.reserve(DeallocIPs.size());
+ for (InsertPointTy DeallocIP : DeallocIPs)
+ OI->OuterDeallocBBs.push_back(DeallocIP.getBlock());
// Add the thread ID argument.
SmallVector<Instruction *, 4> ToBeDeleted;
OI->ExcludeArgsFromAggregate.push_back(createFakeIntVal(
- Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
+ Builder, AllocIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
OI->PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
Mergeable, Priority, EventHandle, TaskAllocaBB,
@@ -2414,10 +2372,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointOrErrorTy
-OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
- InsertPointTy AllocaIP,
- BodyGenCallbackTy BodyGenCB) {
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskgroup(
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs, BodyGenCallbackTy BodyGenCB) {
if (!updateToLocation(Loc))
return InsertPointTy();
@@ -2432,7 +2389,7 @@ OpenMPIRBuilder::createTaskgroup(const LocationDescription &Loc,
Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit");
- if (Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
+ if (Error Err = BodyGenCB(AllocIP, Builder.saveIP(), DeallocIPs))
return Err;
Builder.SetInsertPoint(TaskgroupExitBB);
@@ -2501,8 +2458,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
Builder.SetInsertPoint(CaseBB);
BranchInst *CaseEndBr = Builder.CreateBr(Continue);
- if (Error Err = SectionCB(InsertPointTy(), {CaseEndBr->getParent(),
- CaseEndBr->getIterator()}))
+ if (Error Err =
+ SectionCB(InsertPointTy(),
+ {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
return Err;
CaseNumber++;
}
@@ -4355,8 +4313,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
}
// Allocate temporary buffer by master thread
- auto BodyGenCB = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> Error {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error {
Builder.restoreIP(CodeGenIP);
Value *AllocSpan =
Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
@@ -4395,8 +4353,8 @@ Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
ArrayRef<ReductionInfo> ReductionInfos, ScanInfo *ScanRedInfo) {
- auto BodyGenCB = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> Error {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error {
Builder.restoreIP(CodeGenIP);
for (ReductionInfo RedInfo : ReductionInfos) {
Value *PrivateVar = RedInfo.PrivateVariable;
@@ -4447,8 +4405,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
if (!updateToLocation(Loc))
return Loc.IP;
- auto BodyGenCB = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> Error {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error {
Builder.restoreIP(CodeGenIP);
Function *CurFn = Builder.GetInsertBlock()->getParent();
// for (int k = 0; k <= ceil(log2(n)); ++k)
@@ -5291,13 +5249,13 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
auto OI = std::make_unique<OutlineInfo>();
- OI->OuterAllocaBB = CLI->getPreheader();
+ OI->OuterAllocBB = CLI->getPreheader();
Function *OuterFn = CLI->getPreheader()->getParent();
// Instructions which need to be deleted at the end of code generation
SmallVector<Instruction *, 4> ToBeDeleted;
- OI->OuterAllocaBB = AllocaIP.getBlock();
+ OI->OuterAllocBB = AllocaIP.getBlock();
// Mark the body loop as region which needs to be extracted
OI->EntryBB = CLI->getBody();
@@ -5334,7 +5292,7 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocationBlock */ CLI->getPreheader(),
- /* DeallocationBlock */ nullptr,
+ /* DeallocationBlocks */ {},
/* Suffix */ ".omp_wsloop",
/* AggrArgsIn0AddrSpace */ true);
@@ -6691,8 +6649,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
// generate body
- if (Error Err = BodyGenCB(/* AllocaIP */ InsertPointTy(),
- /* CodeGenIP */ Builder.saveIP()))
+ if (Error Err =
+ BodyGenCB(/* AllocIP */ InsertPointTy(),
+ /* CodeGenIP */ Builder.saveIP(), /* DeallocIPs */ {}))
return Err;
// emit exit call and do any needed finalization.
@@ -7333,10 +7292,11 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction(
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
- const LocationDescription &Loc, InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
- TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
- CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc,
+ const LocationDescription &Loc, InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs,
+ Value *DeviceID, Value *IfCond, TargetDataInfo &Info,
+ GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB,
+ omp::RuntimeFunction *MapperFunc,
function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
BodyGenTy BodyGenType)>
BodyGenCB,
@@ -7362,11 +7322,11 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
// Generate the code for the opening of the data environment. Capture all the
// arguments of the runtime call by reference because they are used in the
// closing of the region.
- auto BeginThenGen = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> Error {
+ auto BeginThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error {
MapInfo = &GenMapInfoCB(Builder.saveIP());
if (Error Err = emitOffloadingArrays(
- AllocaIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
+ AllocIP, Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
/*IsNonContiguous=*/true, DeviceAddrCB))
return Err;
@@ -7420,7 +7380,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
cantFail(TaskBodyCB(/*DeviceID=*/nullptr, /*RTLoc=*/nullptr,
/*TargetTaskAllocaIP=*/{}));
else
- cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
+ cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocIP,
/*Dependencies=*/{}, RTArgs, Info.HasNoWait));
} else {
Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
@@ -7451,8 +7411,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
// If we need device pointer privatization, we need to emit the body of the
// region with no privatization in the 'else' branch of the conditional.
// Otherwise, we don't have to do anything.
- auto BeginElseGen = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> Error {
+ auto BeginElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error {
InsertPointOrErrorTy AfterIP =
BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
if (!AfterIP)
@@ -7462,7 +7422,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
};
// Generate code for the closing of the data region.
- auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto EndThenGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
TargetDataRTArgs RTArgs;
Info.EmitDebug = !MapInfo->Names.empty();
emitOffloadingArraysArgument(Builder, RTArgs, Info, /*ForEndCall=*/true);
@@ -7491,7 +7452,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
// We don't have to do anything to close the region if the if clause evaluates
// to false.
- auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto EndElseGen = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
@@ -7499,8 +7461,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
if (BodyGenCB) {
Error Err = [&]() {
if (IfCond)
- return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
- return BeginThenGen(AllocaIP, Builder.saveIP());
+ return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocIP);
+ return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs);
}();
if (Err)
@@ -7515,12 +7477,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
restoreIPandDebugLoc(Builder, *AfterIP);
if (IfCond)
- return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
- return EndThenGen(AllocaIP, Builder.saveIP());
+ return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocIP);
+ return EndThenGen(AllocIP, Builder.saveIP(), DeallocIPs);
}
if (IfCond)
- return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
- return BeginThenGen(AllocaIP, Builder.saveIP());
+ return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocIP);
+ return BeginThenGen(AllocIP, Builder.saveIP(), DeallocIPs);
}();
if (Err)
@@ -7727,15 +7689,18 @@ static Expected<Function *> createOutlinedFunction(
if (OMPBuilder.Config.isTargetDevice())
OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
- // Insert target deinit call in the device compilation pass.
+ BasicBlock *ExitBB = splitBB(Builder, /*CreateBranch=*/true, "target.exit");
BasicBlock *OutlinedBodyBB =
splitBB(Builder, /*CreateBranch=*/true, "outlined.body");
llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
Builder.saveIP(),
- OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin()));
+ OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->begin()),
+ OpenMPIRBuilder::InsertPointTy(ExitBB, ExitBB->begin()));
if (!AfterIP)
return AfterIP.takeError();
- Builder.restoreIP(*AfterIP);
+ Builder.SetInsertPoint(ExitBB);
+
+ // Insert target deinit call in the device compilation pass.
if (OMPBuilder.Config.isTargetDevice())
OMPBuilder.createTargetDeinit(Builder);
@@ -8183,7 +8148,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
auto OI = std::make_unique<OutlineInfo>();
OI->EntryBB = TargetTaskAllocaBB;
- OI->OuterAllocaBB = AllocaIP.getBlock();
+ OI->OuterAllocBB = AllocaIP.getBlock();
// Add the thread ID argument.
SmallVector<Instruction *, 4> ToBeDeleted;
@@ -8445,7 +8410,8 @@ Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
static void emitTargetCall(
OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
- OpenMPIRBuilder::InsertPointTy AllocaIP,
+ OpenMPIRBuilder::InsertPointTy AllocIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs,
OpenMPIRBuilder::TargetDataInfo &Info,
const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
@@ -8502,8 +8468,9 @@ static void emitTargetCall(
};
auto &&EmitTargetCallElse =
- [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
- OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error {
+ [&](OpenMPIRBuilder::InsertPointTy AllocIP,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> Error {
// Assume no error was returned because EmitTargetCallFallbackCB doesn't
// produce any.
OpenMPIRBuilder::InsertPointTy AfterIP = cantFail([&]() {
@@ -8513,7 +8480,7 @@ static void emitTargetCall(
// OutlinedFnID=nullptr results in that call not being done.
OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
return OMPBuilder.emitTargetTask(TaskBodyCB, /*DeviceID=*/nullptr,
- /*RTLoc=*/nullptr, AllocaIP,
+ /*RTLoc=*/nullptr, AllocIP,
Dependencies, EmptyRTArgs, HasNoWait);
}
return EmitTargetCallFallbackCB(Builder.saveIP());
@@ -8524,13 +8491,14 @@ static void emitTargetCall(
};
auto &&EmitTargetCallThen =
- [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
- OpenMPIRBuilder::InsertPointTy CodeGenIP) -> Error {
+ [&](OpenMPIRBuilder::InsertPointTy AllocIP,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs) -> Error {
Info.HasNoWait = HasNoWait;
OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
OpenMPIRBuilder::TargetDataRTArgs RTArgs;
if (Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
- AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
+ AllocIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
/*IsNonContiguous=*/true,
/*ForEndCall=*/false))
return Err;
@@ -8603,13 +8571,13 @@ static void emitTargetCall(
// The presence of certain clauses on the target directive require the
// explicit generation of the target task.
if (RequiresOuterTargetTask)
- return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
+ return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocIP,
Dependencies, KArgs.RTArgs,
Info.HasNoWait);
return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
EmitTargetCallFallbackCB, KArgs,
- DeviceID, RTLoc, AllocaIP);
+ DeviceID, RTLoc, AllocIP);
}());
Builder.restoreIP(AfterIP);
@@ -8620,24 +8588,24 @@ static void emitTargetCall(
// wasn't created. In this case we just run the host fallback directly and
// ignore any potential 'if' clauses.
if (!OutlinedFnID) {
- cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
+ cantFail(EmitTargetCallElse(AllocIP, Builder.saveIP(), DeallocIPs));
return;
}
// If there's no 'if' clause, only generate the kernel launch code path.
if (!IfCond) {
- cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
+ cantFail(EmitTargetCallThen(AllocIP, Builder.saveIP(), DeallocIPs));
return;
}
cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
- EmitTargetCallElse, AllocaIP));
+ EmitTargetCallElse, AllocIP));
}
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
- const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP, TargetDataInfo &Info,
- TargetRegionEntryInfo &EntryInfo,
+ const LocationDescription &Loc, bool IsOffloadEntry, InsertPointTy AllocIP,
+ InsertPointTy CodeGenIP, ArrayRef<InsertPointTy> DeallocIPs,
+ TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo,
const TargetKernelDefaultAttrs &DefaultAttrs,
const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond,
SmallVectorImpl<Value *> &Inputs, GenMapInfoCallbackTy GenMapInfoCB,
@@ -8665,9 +8633,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
// to make a remote call (offload) to the previously outlined function
// that represents the target region. Do that now.
if (!Config.isTargetDevice())
- emitTargetCall(*this, Builder, AllocaIP, Info, DefaultAttrs, RuntimeAttrs,
- IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
- CustomMapperCB, Dependencies, HasNowait);
+ emitTargetCall(*this, Builder, AllocIP, DeallocIPs, Info, DefaultAttrs,
+ RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
+ GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait);
return Builder.saveIP();
}
@@ -9446,15 +9414,16 @@ void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn,
Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
BodyGenCallbackTy ElseGen,
- InsertPointTy AllocaIP) {
+ InsertPointTy AllocIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
// If the condition constant folds and can be elided, try to avoid emitting
// the condition and the dead arm of the if/else.
if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
auto CondConstant = CI->getSExtValue();
if (CondConstant)
- return ThenGen(AllocaIP, Builder.saveIP());
+ return ThenGen(AllocIP, Builder.saveIP(), DeallocIPs);
- return ElseGen(AllocaIP, Builder.saveIP());
+ return ElseGen(AllocIP, Builder.saveIP(), DeallocIPs);
}
Function *CurFn = Builder.GetInsertBlock()->getParent();
@@ -9467,13 +9436,13 @@ Error OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
Builder.CreateCondBr(Cond, ThenBlock, ElseBlock);
// Emit the 'then' code.
emitBlock(ThenBlock, CurFn);
- if (Error Err = ThenGen(AllocaIP, Builder.saveIP()))
+ if (Error Err = ThenGen(AllocIP, Builder.saveIP(), DeallocIPs))
return Err;
emitBranch(ContBlock);
// Emit the 'else' code if present.
// There is no need to emit line number for unconditional branch.
emitBlock(ElseBlock, CurFn);
- if (Error Err = ElseGen(AllocaIP, Builder.saveIP()))
+ if (Error Err = ElseGen(AllocIP, Builder.saveIP(), DeallocIPs))
return Err;
// There is no need to emit line number for unconditional branch.
emitBranch(ContBlock);
@@ -10181,13 +10150,14 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
// Generate the body of teams.
InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
- if (Error Err = BodyGenCB(AllocaIP, CodeGenIP))
+ InsertPointTy DeallocIP(ExitBB, ExitBB->begin());
+ if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP))
return Err;
auto OI = std::make_unique<OutlineInfo>();
OI->EntryBB = AllocaBB;
OI->ExitBB = ExitBB;
- OI->OuterAllocaBB = &OuterAllocaBB;
+ OI->OuterAllocBB = &OuterAllocaBB;
// Insert fake values for global tid and bound tid.
SmallVector<Instruction *, 8> ToBeDeleted;
@@ -10243,14 +10213,13 @@ OpenMPIRBuilder::createTeams(const LocationDescription &Loc,
return Builder.saveIP();
}
-OpenMPIRBuilder::InsertPointOrErrorTy
-OpenMPIRBuilder::createDistribute(const LocationDescription &Loc,
- InsertPointTy OuterAllocaIP,
- BodyGenCallbackTy BodyGenCB) {
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createDistribute(
+ const LocationDescription &Loc, InsertPointTy OuterAllocIP,
+ ArrayRef<InsertPointTy> OuterDeallocIPs, BodyGenCallbackTy BodyGenCB) {
if (!updateToLocation(Loc))
return InsertPointTy();
- BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
+ BasicBlock *OuterAllocaBB = OuterAllocIP.getBlock();
if (OuterAllocaBB == Builder.GetInsertBlock()) {
BasicBlock *BodyBB =
@@ -10267,13 +10236,17 @@ OpenMPIRBuilder::createDistribute(const LocationDescription &Loc,
// Generate the body of distribute clause
InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
- if (Error Err = BodyGenCB(AllocaIP, CodeGenIP))
+ InsertPointTy DeallocIP(ExitBB, ExitBB->begin());
+ if (Error Err = BodyGenCB(AllocaIP, CodeGenIP, DeallocIP))
return Err;
auto OI = std::make_unique<OutlineInfo>();
- OI->OuterAllocaBB = OuterAllocaIP.getBlock();
+ OI->OuterAllocBB = OuterAllocIP.getBlock();
OI->EntryBB = AllocaBB;
OI->ExitBB = ExitBB;
+ OI->OuterDeallocBBs.reserve(OuterDeallocIPs.size());
+ for (InsertPointTy DeallocIP : OuterDeallocIPs)
+ OI->OuterDeallocBBs.push_back(DeallocIP.getBlock());
addOutlineInfo(std::move(OI));
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
@@ -10337,32 +10310,33 @@ std::unique_ptr<CodeExtractor>
OpenMPIRBuilder::OutlineInfo::createCodeExtractor(ArrayRef<BasicBlock *> Blocks,
bool ArgsInZeroAddressSpace,
Twine Suffix) {
- return std::make_unique<CodeExtractor>(Blocks, /* DominatorTree */ nullptr,
- /* AggregateArgs */ true,
- /* BlockFrequencyInfo */ nullptr,
- /* BranchProbabilityInfo */ nullptr,
- /* AssumptionCache */ nullptr,
- /* AllowVarArgs */ true,
- /* AllowAlloca */ true,
- /* AllocationBlock*/ OuterAllocaBB,
- /* DeallocationBlock */ nullptr,
- /* Suffix */ Suffix.str(),
- ArgsInZeroAddressSpace);
+ return std::make_unique<CodeExtractor>(
+ Blocks, /* DominatorTree */ nullptr,
+ /* AggregateArgs */ true,
+ /* BlockFrequencyInfo */ nullptr,
+ /* BranchProbabilityInfo */ nullptr,
+ /* AssumptionCache */ nullptr,
+ /* AllowVarArgs */ true,
+ /* AllowAlloca */ true,
+ /* AllocationBlock*/ OuterAllocBB,
+ /* DeallocationBlocks */ ArrayRef<BasicBlock *>(),
+ /* Suffix */ Suffix.str(), ArgsInZeroAddressSpace);
}
std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
ArrayRef<BasicBlock *> Blocks, bool ArgsInZeroAddressSpace, Twine Suffix) {
- // TODO: Initialize the DeallocationBlock with a proper pair to OuterAllocaBB.
return std::make_unique<DeviceSharedMemCodeExtractor>(
- OMPBuilder, AllocBlockOverride, Blocks, /* DominatorTree */ nullptr,
+ OMPBuilder, Blocks, /* DominatorTree */ nullptr,
/* AggregateArgs */ true,
/* BlockFrequencyInfo */ nullptr,
/* BranchProbabilityInfo */ nullptr,
/* AssumptionCache */ nullptr,
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
- /* AllocationBlock*/ OuterAllocaBB,
- /* DeallocationBlock */ ExitBB,
+ /* AllocationBlock*/ OuterAllocBB,
+ /* DeallocationBlocks */ OuterDeallocBBs.empty()
+ ? SmallVector<BasicBlock *>{ExitBB}
+ : OuterDeallocBBs,
/* Suffix */ Suffix.str(), ArgsInZeroAddressSpace);
}
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 5780901..e8f3c68 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -721,7 +721,7 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
SubRegion, &*DT, /* AggregateArgs */ false, /* BFI */ nullptr,
/* BPI */ nullptr, AC, /* AllowVarArgs */ false,
/* AllowAlloca */ false, /* AllocaBlock */ nullptr,
- /* DeallocationBlock */ nullptr,
+ /* DeallocationBlocks */ {},
/* Suffix */ "cold." + std::to_string(OutlinedFunctionID));
if (CE.isEligible() && isSplittingBeneficial(CE, SubRegion, TTI) &&
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 77c1d98..8cdaca4 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -2829,7 +2829,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, nullptr, nullptr, "outlined");
+ false, nullptr, {}, "outlined");
findAddInputsOutputs(M, *OS, NotSame);
if (!OS->IgnoreRegion)
OutlinedRegions.push_back(OS);
@@ -2940,7 +2940,7 @@ unsigned IROutliner::doOutline(Module &M) {
OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
- false, nullptr, nullptr, "outlined");
+ false, nullptr, {}, "outlined");
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d9c8412..20fcb73 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1086,7 +1086,8 @@ private:
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -1126,7 +1127,8 @@ private:
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -1256,8 +1258,9 @@ private:
// avoid overriding binding settings, and without explicit cancellation.
OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail(OMPInfoCache.OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
- OMP_PROC_BIND_default, /* IsCancellable */ false));
+ Loc, AllocaIP, /* DeallocIPs */ {}, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default,
+ /* IsCancellable */ false));
BranchInst::Create(AfterBB, AfterIP.getBlock());
// Perform the actual outlining.
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 3339f5e..c484968 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -264,11 +264,11 @@ CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
BranchProbabilityInfo *BPI, AssumptionCache *AC,
bool AllowVarArgs, bool AllowAlloca,
BasicBlock *AllocationBlock,
- BasicBlock *DeallocationBlock, std::string Suffix,
- bool ArgsInZeroAddressSpace)
+ ArrayRef<BasicBlock *> DeallocationBlocks,
+ std::string Suffix, bool ArgsInZeroAddressSpace)
: DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
BPI(BPI), AC(AC), AllocationBlock(AllocationBlock),
- DeallocationBlock(DeallocationBlock), AllowVarArgs(AllowVarArgs),
+ DeallocationBlocks(DeallocationBlocks), AllowVarArgs(AllowVarArgs),
Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)),
Suffix(Suffix), ArgsInZeroAddressSpace(ArgsInZeroAddressSpace) {}
@@ -2029,22 +2029,25 @@ CallInst *CodeExtractor::emitReplacerCall(
{}, call);
// Deallocate intermediate variables if they need explicit deallocation.
- BasicBlock *DeallocBlock = codeReplacer;
- BasicBlock::iterator DeallocIP = codeReplacer->end();
- if (DeallocationBlock) {
- DeallocBlock = DeallocationBlock;
- DeallocIP = DeallocationBlock->getFirstInsertionPt();
- }
+ auto deallocVars = [&](BasicBlock *DeallocBlock,
+ BasicBlock::iterator DeallocIP) {
+ int Index = 0;
+ for (Value *Output : outputs) {
+ if (!StructValues.contains(Output))
+ deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++],
+ Output->getType());
+ }
- int Index = 0;
- for (Value *Output : outputs) {
- if (!StructValues.contains(Output))
- deallocateVar(DeallocBlock, DeallocIP, ReloadOutputs[Index++],
- Output->getType());
- }
+ if (Struct)
+ deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy);
+ };
- if (Struct)
- deallocateVar(DeallocBlock, DeallocIP, Struct, StructArgTy);
+ if (DeallocationBlocks.empty()) {
+ deallocVars(codeReplacer, codeReplacer->end());
+ } else {
+ for (BasicBlock *DeallocationBlock : DeallocationBlocks)
+ deallocVars(DeallocationBlock, DeallocationBlock->getFirstInsertionPt());
+ }
return call;
}
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index b7a060b..9e9f943 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -55,8 +55,9 @@ using namespace omp;
}
#define BODYGENCB_WRAPPER(cb) \
- [&cb](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) -> Error { \
- cb(AllocaIP, CodeGenIP); \
+ [&cb](InsertPointTy AllocIP, InsertPointTy CodeGenIP, \
+ ArrayRef<InsertPointTy> DeallocIPs) -> Error { \
+ cb(AllocIP, CodeGenIP, DeallocIPs); \
return Error::success(); \
}
@@ -664,10 +665,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumBodiesGenerated;
- Builder.restoreIP(AllocaIP);
+ Builder.restoreIP(AllocIP);
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
Builder.CreateStore(F->arg_begin(), PrivAI);
@@ -715,8 +717,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimpleGPU) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
- nullptr, OMP_PROC_BIND_default, false));
+ Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false));
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
@@ -777,10 +779,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumBodiesGenerated;
- Builder.restoreIP(AllocaIP);
+ Builder.restoreIP(AllocIP);
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
Builder.CreateStore(F->arg_begin(), PrivAI);
@@ -828,8 +831,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
- nullptr, OMP_PROC_BIND_default, false));
+ Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false));
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 1U);
EXPECT_EQ(NumFinalizationPoints, 1U);
@@ -885,7 +888,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
unsigned NumOuterBodiesGenerated = 0;
unsigned NumFinalizationPoints = 0;
- auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumInnerBodiesGenerated;
return Error::success();
};
@@ -908,7 +912,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
return Error::success();
};
- auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumOuterBodiesGenerated;
Builder.restoreIP(CodeGenIP);
BasicBlock *CGBB = CodeGenIP.getBlock();
@@ -917,7 +922,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
+ OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {},
InnerBodyGenCB, PrivCB, FiniCB, nullptr,
nullptr, OMP_PROC_BIND_default, false));
@@ -929,7 +934,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createParallel(
- Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
+ Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB),
PrivCB, FiniCB, nullptr, nullptr,
OMP_PROC_BIND_default, false));
@@ -986,7 +991,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
unsigned NumOuterBodiesGenerated = 0;
unsigned NumFinalizationPoints = 0;
- auto InnerBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto InnerBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumInnerBodiesGenerated;
return Error::success();
};
@@ -1009,7 +1015,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
return Error::success();
};
- auto OuterBodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto OuterBodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumOuterBodiesGenerated;
Builder.restoreIP(CodeGenIP);
BasicBlock *CGBB = CodeGenIP.getBlock();
@@ -1022,18 +1029,18 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP1,
- OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocaIP,
+ OMPBuilder.createParallel(InsertPointTy(CGBB, CGBB->end()), AllocIP, {},
InnerBodyGenCB, PrivCB, FiniCB, nullptr,
nullptr, OMP_PROC_BIND_default, false));
Builder.restoreIP(AfterIP1);
Builder.CreateBr(NewBB1);
- ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP2,
- OMPBuilder.createParallel(
- InsertPointTy(NewBB1, NewBB1->end()), AllocaIP,
- InnerBodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
- OMP_PROC_BIND_default, false));
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, AfterIP2,
+ OMPBuilder.createParallel(InsertPointTy(NewBB1, NewBB1->end()), AllocIP,
+ {}, InnerBodyGenCB, PrivCB, FiniCB, nullptr,
+ nullptr, OMP_PROC_BIND_default, false));
Builder.restoreIP(AfterIP2);
Builder.CreateBr(NewBB2);
@@ -1043,7 +1050,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createParallel(
- Loc, AllocaIP, BODYGENCB_WRAPPER(OuterBodyGenCB),
+ Loc, AllocaIP, {}, BODYGENCB_WRAPPER(OuterBodyGenCB),
PrivCB, FiniCB, nullptr, nullptr,
OMP_PROC_BIND_default, false));
@@ -1107,10 +1114,11 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
unsigned NumPrivatizedVars = 0;
unsigned NumFinalizationPoints = 0;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumBodiesGenerated;
- Builder.restoreIP(AllocaIP);
+ Builder.restoreIP(AllocIP);
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
Builder.CreateStore(F->arg_begin(), PrivAI);
@@ -1159,7 +1167,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createParallel(Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB,
+ OMPBuilder.createParallel(Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
Builder.CreateIsNotNull(F->arg_begin()),
nullptr, OMP_PROC_BIND_default, false));
@@ -1214,7 +1222,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
unsigned NumFinalizationPoints = 0;
CallInst *CheckedBarrier = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumBodiesGenerated;
Builder.restoreIP(CodeGenIP);
@@ -1282,11 +1291,12 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(),
F->getEntryBlock().getFirstInsertionPt());
- ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createParallel(
- Loc, AllocaIP, BODYGENCB_WRAPPER(BodyGenCB), PrivCB,
- FiniCB, Builder.CreateIsNotNull(F->arg_begin()),
- nullptr, OMP_PROC_BIND_default, true));
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, AfterIP,
+ OMPBuilder.createParallel(Loc, AllocaIP, {}, BODYGENCB_WRAPPER(BodyGenCB),
+ PrivCB, FiniCB,
+ Builder.CreateIsNotNull(F->arg_begin()),
+ nullptr, OMP_PROC_BIND_default, true));
EXPECT_EQ(NumBodiesGenerated, 1U);
EXPECT_EQ(NumPrivatizedVars, 0U);
@@ -1351,7 +1361,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
Value *StructPtrVal = Builder.CreateCall(RetStructPtrFunc);
Instruction *Internal;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
IRBuilder<>::InsertPointGuard Guard(Builder);
Builder.restoreIP(CodeGenIP);
Internal = Builder.CreateCall(TakeI32Func, I32Val);
@@ -1371,8 +1382,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
F->getEntryBlock().getFirstInsertionPt());
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createParallel(
- Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr,
- nullptr, OMP_PROC_BIND_default, false));
+ Loc, AllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
+ nullptr, nullptr, OMP_PROC_BIND_default, false));
Builder.restoreIP(AfterIP);
Builder.CreateRetVoid();
@@ -2875,9 +2886,10 @@ TEST_F(OpenMPIRBuilderTest, MasterDirective) {
BasicBlock *EntryBB = nullptr;
BasicBlock *ThenBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- if (AllocaIP.isSet())
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ if (AllocIP.isSet())
+ Builder.restoreIP(AllocIP);
else
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
@@ -2956,9 +2968,10 @@ TEST_F(OpenMPIRBuilderTest, MaskedDirective) {
BasicBlock *EntryBB = nullptr;
BasicBlock *ThenBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- if (AllocaIP.isSet())
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ if (AllocIP.isSet())
+ Builder.restoreIP(AllocIP);
else
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
@@ -3035,7 +3048,8 @@ TEST_F(OpenMPIRBuilderTest, CriticalDirective) {
AllocaInst *PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
// actual start for bodyCB
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
@@ -3286,7 +3300,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveThreads) {
AllocaInst *PrivAI =
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
@@ -3360,7 +3375,8 @@ TEST_F(OpenMPIRBuilderTest, OrderedDirectiveSimd) {
AllocaInst *PrivAI =
Builder.CreateAlloca(F->arg_begin()->getType(), nullptr, "priv.inst");
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
llvm::BasicBlock *CodeGenIPBB = CodeGenIP.getBlock();
llvm::Instruction *CodeGenIPInst = &*CodeGenIP.getPoint();
EXPECT_EQ(CodeGenIPBB->getTerminator(), CodeGenIPInst);
@@ -3467,9 +3483,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirective) {
BasicBlock *EntryBB = nullptr;
BasicBlock *ThenBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- if (AllocaIP.isSet())
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ if (AllocIP.isSet())
+ Builder.restoreIP(AllocIP);
else
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
@@ -3560,9 +3577,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveNowait) {
BasicBlock *EntryBB = nullptr;
BasicBlock *ThenBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- if (AllocaIP.isSet())
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ if (AllocIP.isSet())
+ Builder.restoreIP(AllocIP);
else
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
@@ -3681,9 +3699,10 @@ TEST_F(OpenMPIRBuilderTest, SingleDirectiveCopyPrivate) {
Function *CopyFunc =
Function::Create(CopyFuncTy, Function::PrivateLinkage, "copy_var", *M);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- if (AllocaIP.isSet())
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ if (AllocIP.isSet())
+ Builder.restoreIP(AllocIP);
else
Builder.SetInsertPoint(&*(F->getEntryBlock().getFirstInsertionPt()));
PrivAI = Builder.CreateAlloca(F->arg_begin()->getType());
@@ -4545,8 +4564,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTeams) {
AllocaInst *ValPtr128 = Builder.CreateAlloca(Builder.getInt128Ty());
Value *Val128 = Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "load");
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ Builder.restoreIP(AllocIP);
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
"bodygen.alloca128");
@@ -4626,7 +4646,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithThreadLimit) {
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -4682,7 +4703,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsUpper) {
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -4744,7 +4766,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsBoth) {
Value *NumTeamsUpper =
Builder.CreateAdd(F->arg_begin(), Builder.getInt32(10), "numTeamsUpper");
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -4811,7 +4834,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithNumTeamsAndThreadLimit) {
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -4868,7 +4892,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfCondition) {
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -4935,7 +4960,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTeamsWithIfConditionAndNumTeams) {
Function::Create(FunctionType::get(Builder.getVoidTy(), false),
GlobalValue::ExternalLinkage, "fakeFunction", M.get());
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
Builder.CreateCall(FakeFunction, {});
return Error::success();
@@ -5153,7 +5179,8 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
// xor of thread-id;
// and store the result in global variables.
InsertPointTy BodyIP, BodyAllocaIP;
- auto BodyGenCB = [&](InsertPointTy InnerAllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.restoreIP(CodeGenIP);
@@ -5171,7 +5198,7 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
Builder.CreateStore(Xor, XorReduced);
BodyIP = Builder.saveIP();
- BodyAllocaIP = InnerAllocaIP;
+ BodyAllocaIP = InnerAllocIP;
return Error::success();
};
@@ -5207,12 +5234,12 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
// Do nothing in finalization.
auto FiniCB = [&](InsertPointTy CodeGenIP) { return Error::success(); };
- ASSERT_EXPECTED_INIT(
- OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createParallel(Loc, OuterAllocaIP, BodyGenCB, PrivCB, FiniCB,
- /* IfCondition */ nullptr,
- /* NumThreads */ nullptr, OMP_PROC_BIND_default,
- /* IsCancellable */ false));
+ ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
+ OMPBuilder.createParallel(
+ Loc, OuterAllocaIP, {}, BodyGenCB, PrivCB, FiniCB,
+ /* IfCondition */ nullptr,
+ /* NumThreads */ nullptr, OMP_PROC_BIND_default,
+ /* IsCancellable */ false));
Builder.restoreIP(AfterIP);
OpenMPIRBuilder::ReductionInfo ReductionInfos[] = {
@@ -5531,8 +5558,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
Builder.CreateStore(Builder.getInt32(1), XorReduced);
InsertPointTy FirstBodyIP, FirstBodyAllocaIP;
- auto FirstBodyGenCB = [&](InsertPointTy InnerAllocaIP,
- InsertPointTy CodeGenIP) {
+ auto FirstBodyGenCB = [&](InsertPointTy InnerAllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.restoreIP(CodeGenIP);
@@ -5547,13 +5574,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
Builder.CreateStore(Sum, SumReduced);
FirstBodyIP = Builder.saveIP();
- FirstBodyAllocaIP = InnerAllocaIP;
+ FirstBodyAllocaIP = InnerAllocIP;
return Error::success();
};
InsertPointTy SecondBodyIP, SecondBodyAllocaIP;
- auto SecondBodyGenCB = [&](InsertPointTy InnerAllocaIP,
- InsertPointTy CodeGenIP) {
+ auto SecondBodyGenCB = [&](InsertPointTy InnerAllocIP,
+ InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.restoreIP(CodeGenIP);
@@ -5566,7 +5594,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
Builder.CreateStore(Xor, XorReduced);
SecondBodyIP = Builder.saveIP();
- SecondBodyAllocaIP = InnerAllocaIP;
+ SecondBodyAllocaIP = InnerAllocIP;
return Error::success();
};
@@ -5606,14 +5634,14 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP1,
- OMPBuilder.createParallel(Loc, OuterAllocaIP, FirstBodyGenCB, PrivCB,
+ OMPBuilder.createParallel(Loc, OuterAllocaIP, {}, FirstBodyGenCB, PrivCB,
FiniCB, /* IfCondition */ nullptr,
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
/* IsCancellable */ false));
Builder.restoreIP(AfterIP1);
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP2,
- OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP,
+ OMPBuilder.createParallel({Builder.saveIP(), DL}, OuterAllocaIP, {},
SecondBodyGenCB, PrivCB, FiniCB,
/* IfCondition */ nullptr,
/* NumThreads */ nullptr, OMP_PROC_BIND_default,
@@ -5707,7 +5735,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSectionsSimple) {
llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
auto FiniCB = [&](InsertPointTy IP) { return Error::success(); };
- auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
SectionCBVector.push_back(SectionCB);
@@ -5752,7 +5781,8 @@ TEST_F(OpenMPIRBuilderTest, CreateSections) {
EXPECT_NE(IPBB->end(), IP.getPoint());
};
- auto SectionCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto SectionCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
++NumBodiesGenerated;
CaseBBs.push_back(CodeGenIP.getBlock());
SwitchBB = CodeGenIP.getBlock()->getSinglePredecessor();
@@ -6092,7 +6122,7 @@ TEST_F(OpenMPIRBuilderTest, TargetEnterData) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTargetData(
- Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
+ Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID),
/* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc));
Builder.restoreIP(AfterIP);
@@ -6155,7 +6185,7 @@ TEST_F(OpenMPIRBuilderTest, TargetExitData) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTargetData(
- Loc, AllocaIP, Builder.saveIP(), Builder.getInt64(DeviceID),
+ Loc, AllocaIP, Builder.saveIP(), {}, Builder.getInt64(DeviceID),
/* IfCond= */ nullptr, Info, GenMapInfoCB, CustomMapperCB, &RTLFunc));
Builder.restoreIP(AfterIP);
@@ -6266,7 +6296,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, TargetDataIP1,
- OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
+ OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {},
Builder.getInt64(DeviceID),
/* IfCond= */ nullptr, Info, GenMapInfoCB,
CustomMapperCB, nullptr, BodyCB));
@@ -6295,7 +6325,7 @@ TEST_F(OpenMPIRBuilderTest, TargetDataRegion) {
};
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, TargetDataIP2,
- OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(),
+ OMPBuilder.createTargetData(Loc, AllocaIP, Builder.saveIP(), {},
Builder.getInt64(DeviceID),
/* IfCond= */ nullptr, Info, GenMapInfoCB,
CustomMapperCB, nullptr, BodyTargetCB));
@@ -6346,8 +6376,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
Builder.CreateStore(Builder.getInt32(10), APtr);
Builder.CreateStore(Builder.getInt32(20), BPtr);
- auto BodyGenCB = [&](InsertPointTy AllocaIP,
- InsertPointTy CodeGenIP) -> InsertPointTy {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) -> InsertPointTy {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
Builder.restoreIP(CodeGenIP);
@@ -6417,10 +6447,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegion) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
- Builder.saveIP(), Info, EntryInfo, DefaultAttrs,
- RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
- GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB,
- CustomMapperCB, {}, false));
+ Builder.saveIP(), {}, Info, EntryInfo,
+ DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr,
+ Inputs, GenMapInfoCB, BodyGenCB,
+ SimpleArgAccessorCB, CustomMapperCB, {}, false));
EXPECT_EQ(DL, Builder.getCurrentDebugLocation());
Builder.restoreIP(AfterIP);
@@ -6565,8 +6595,9 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
};
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
- auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
- OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs)
-> OpenMPIRBuilder::InsertPointTy {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -6591,7 +6622,7 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
- Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
+ {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB,
{}, false));
@@ -6672,7 +6703,14 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDevice) {
Instruction *Value1 = &*OutlinedBlock->getFirstNonPHIIt();
EXPECT_EQ(Value1, Value);
EXPECT_EQ(Value1->getNextNode(), TargetStore);
- auto *Deinit = TargetStore->getNextNode();
+
+ auto *TargetExitBlockBr = TargetStore->getNextNode();
+ EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr));
+
+ auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0);
+ EXPECT_EQ(TargetExitBlock->getName(), "target.exit");
+
+ Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt();
EXPECT_NE(Deinit, nullptr);
auto *DeinitCall = dyn_cast<CallInst>(Deinit);
@@ -6719,8 +6757,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
IRBuilder<> Builder(BB);
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
- auto BodyGenCB = [&](InsertPointTy,
- InsertPointTy CodeGenIP) -> InsertPointTy {
+ auto BodyGenCB = [&](InsertPointTy, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy>) -> InsertPointTy {
Builder.restoreIP(CodeGenIP);
return Builder.saveIP();
};
@@ -6753,10 +6791,10 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionSPMD) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTarget(OmpLoc, /*IsOffloadEntry=*/true, Builder.saveIP(),
- Builder.saveIP(), Info, EntryInfo, DefaultAttrs,
- RuntimeAttrs, /*IfCond=*/nullptr, Inputs,
- GenMapInfoCB, BodyGenCB, SimpleArgAccessorCB,
- CustomMapperCB, {}));
+ Builder.saveIP(), {}, Info, EntryInfo,
+ DefaultAttrs, RuntimeAttrs, /*IfCond=*/nullptr,
+ Inputs, GenMapInfoCB, BodyGenCB,
+ SimpleArgAccessorCB, CustomMapperCB, {}));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
@@ -6839,7 +6877,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy,
- OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy>)
-> OpenMPIRBuilder::InsertPointTy {
Builder.restoreIP(CodeGenIP);
OutlinedFn = CodeGenIP.getBlock()->getParent();
@@ -6860,8 +6899,8 @@ TEST_F(OpenMPIRBuilderTest, TargetRegionDeviceSPMD) {
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTarget(
- Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, Info,
- EntryInfo, DefaultAttrs, RuntimeAttrs,
+ Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP, {},
+ Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB, {}));
Builder.restoreIP(AfterIP);
@@ -6958,8 +6997,9 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
llvm::Value *RaiseAlloca = nullptr;
auto CustomMapperCB = [&](unsigned int I) { return nullptr; };
- auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
- OpenMPIRBuilder::InsertPointTy CodeGenIP)
+ auto BodyGenCB = [&](OpenMPIRBuilder::InsertPointTy AllocIP,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ ArrayRef<OpenMPIRBuilder::InsertPointTy> DeallocIPs)
-> OpenMPIRBuilder::InsertPointTy {
IRBuilderBase::InsertPointGuard guard(Builder);
Builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -6985,7 +7025,7 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTarget(Loc, /*IsOffloadEntry=*/true, EntryIP, EntryIP,
- Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
+ {}, Info, EntryInfo, DefaultAttrs, RuntimeAttrs,
/*IfCond=*/nullptr, CapturedArgs, GenMapInfoCB,
BodyGenCB, SimpleArgAccessorCB, CustomMapperCB,
{}, false));
@@ -7062,7 +7102,14 @@ TEST_F(OpenMPIRBuilderTest, ConstantAllocaRaise) {
EXPECT_TRUE(isa<LoadInst>(Load2));
EXPECT_EQ(Load2, Value);
EXPECT_EQ(Load2->getNextNode(), TargetStore);
- auto *Deinit = TargetStore->getNextNode();
+
+ auto *TargetExitBlockBr = TargetStore->getNextNode();
+ EXPECT_TRUE(isa<BranchInst>(TargetExitBlockBr));
+
+ auto *TargetExitBlock = TargetExitBlockBr->getSuccessor(0);
+ EXPECT_EQ(TargetExitBlock->getName(), "target.exit");
+
+ Instruction *Deinit = &*TargetExitBlock->getFirstNonPHIIt();
EXPECT_NE(Deinit, nullptr);
auto *DeinitCall = dyn_cast<CallInst>(Deinit);
@@ -7091,8 +7138,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
Value *Val128 =
Builder.CreateLoad(Builder.getInt128Ty(), ValPtr128, "bodygen.load");
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ Builder.restoreIP(AllocIP);
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
"bodygen.alloca128");
@@ -7120,7 +7168,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTask) {
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTask(
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
- BodyGenCB));
+ /*DeallocIPs=*/{}, BodyGenCB));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -7219,7 +7267,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
@@ -7231,7 +7280,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskNoArgs) {
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTask(
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
- BodyGenCB));
+ /*DeallocIPs=*/{}, BodyGenCB));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -7254,7 +7303,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
BasicBlock *AllocaBB = Builder.GetInsertBlock();
@@ -7265,7 +7315,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskUntied) {
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTask(
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
- BodyGenCB,
+ /*DeallocIPs=*/{}, BodyGenCB,
/*Tied=*/false));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
@@ -7290,7 +7340,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
BasicBlock *AllocaBB = Builder.GetInsertBlock();
@@ -7308,7 +7359,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskDepend) {
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTask(
Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
- BodyGenCB,
+ /*DeallocIPs=*/{}, BodyGenCB,
/*Tied=*/false, /*Final*/ nullptr, /*IfCondition*/ nullptr, DDS));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
@@ -7370,7 +7421,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
@@ -7381,7 +7433,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskFinal) {
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
+ OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{},
+ BodyGenCB,
/*Tied=*/false, Final));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
@@ -7428,7 +7481,8 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
OMPBuilder.initialize();
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
return Error::success();
};
BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "alloca.split");
@@ -7438,10 +7492,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskIfCondition) {
CmpInst::Predicate::ICMP_EQ, F->getArg(0),
ConstantInt::get(Type::getInt32Ty(M->getContext()), 0U));
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
- ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, AfterIP,
- OMPBuilder.createTask(Loc, AllocaIP, BodyGenCB,
- /*Tied=*/false, /*Final=*/nullptr,
- IfCondition));
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, AfterIP,
+ OMPBuilder.createTask(Loc, AllocaIP, /*DeallocIPs=*/{}, BodyGenCB,
+ /*Tied=*/false, /*Final=*/nullptr, IfCondition));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
Builder.CreateRetVoid();
@@ -7507,8 +7561,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
Value *InternalStoreInst, *InternalLoad32, *InternalLoad128, *InternalIfCmp;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ Builder.restoreIP(AllocIP);
AllocaInst *Local128 = Builder.CreateAlloca(Builder.getInt128Ty(), nullptr,
"bodygen.alloca128");
@@ -7536,7 +7591,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroup) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTaskgroup(
- Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+ Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {},
BodyGenCB));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
@@ -7598,14 +7653,16 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
F->setName("func");
IRBuilder<> Builder(BB);
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
- Builder.restoreIP(AllocaIP);
+ auto BodyGenCB = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
+ Builder.restoreIP(AllocIP);
AllocaInst *Alloca32 =
Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, "bodygen.alloca32");
AllocaInst *Alloca64 =
Builder.CreateAlloca(Builder.getInt64Ty(), nullptr, "bodygen.alloca64");
Builder.restoreIP(CodeGenIP);
- auto TaskBodyGenCB1 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto TaskBodyGenCB1 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
LoadInst *LoadValue =
Builder.CreateLoad(Alloca64->getAllocatedType(), Alloca64);
@@ -7614,11 +7671,13 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
return Error::success();
};
OpenMPIRBuilder::LocationDescription Loc(Builder.saveIP(), DL);
- ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP1,
- OMPBuilder.createTask(Loc, AllocaIP, TaskBodyGenCB1));
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, TaskIP1,
+ OMPBuilder.createTask(Loc, AllocIP, DeallocIPs, TaskBodyGenCB1));
Builder.restoreIP(TaskIP1);
- auto TaskBodyGenCB2 = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ auto TaskBodyGenCB2 = [&](InsertPointTy AllocIP, InsertPointTy CodeGenIP,
+ ArrayRef<InsertPointTy> DeallocIPs) {
Builder.restoreIP(CodeGenIP);
LoadInst *LoadValue =
Builder.CreateLoad(Alloca32->getAllocatedType(), Alloca32);
@@ -7627,8 +7686,9 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
return Error::success();
};
OpenMPIRBuilder::LocationDescription Loc2(Builder.saveIP(), DL);
- ASSERT_EXPECTED_INIT(OpenMPIRBuilder::InsertPointTy, TaskIP2,
- OMPBuilder.createTask(Loc2, AllocaIP, TaskBodyGenCB2));
+ ASSERT_EXPECTED_INIT(
+ OpenMPIRBuilder::InsertPointTy, TaskIP2,
+ OMPBuilder.createTask(Loc2, AllocIP, DeallocIPs, TaskBodyGenCB2));
Builder.restoreIP(TaskIP2);
};
@@ -7639,7 +7699,7 @@ TEST_F(OpenMPIRBuilderTest, CreateTaskgroupWithTasks) {
ASSERT_EXPECTED_INIT(
OpenMPIRBuilder::InsertPointTy, AfterIP,
OMPBuilder.createTaskgroup(
- Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()),
+ Loc, InsertPointTy(AllocaBB, AllocaBB->getFirstInsertionPt()), {},
BODYGENCB_WRAPPER(BodyGenCB)));
Builder.restoreIP(AfterIP);
OMPBuilder.finalize();
diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
index 6fd266a..d63e346 100644
--- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -712,7 +712,7 @@ TEST(CodeExtractor, OpenMPAggregateArgs) {
/* AllowVarArgs */ true,
/* AllowAlloca */ true,
/* AllocationBlock*/ &Func->getEntryBlock(),
- /* DeallocationBlock */ nullptr,
+ /* DeallocationBlocks */ {},
/* Suffix */ ".outlined",
/* ArgsInZeroAddressSpace */ true);
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 539e62a..f5d60a5 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -66,14 +66,17 @@ convertToScheduleKind(std::optional<omp::ClauseScheduleKind> schedKind) {
/// ModuleTranslation stack frame for OpenMP operations. This keeps track of the
/// insertion points for allocas.
-class OpenMPAllocaStackFrame
- : public StateStackFrameBase<OpenMPAllocaStackFrame> {
+class OpenMPAllocStackFrame
+ : public StateStackFrameBase<OpenMPAllocStackFrame> {
public:
- MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocaStackFrame)
-
- explicit OpenMPAllocaStackFrame(llvm::OpenMPIRBuilder::InsertPointTy allocaIP)
- : allocaInsertPoint(allocaIP) {}
- llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
+ MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(OpenMPAllocStackFrame)
+
+ explicit OpenMPAllocStackFrame(
+ llvm::OpenMPIRBuilder::InsertPointTy allocIP,
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs)
+ : allocInsertPoint(allocIP), deallocInsertPoints(deallocIPs) {}
+ llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint;
+ llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints;
};
/// Stack frame to hold a \see llvm::CanonicalLoopInfo representing the
@@ -482,26 +485,33 @@ static LogicalResult handleError(llvm::Expected<T> &result, Operation &op) {
/// Find the insertion point for allocas given the current insertion point for
/// normal operations in the builder.
-static llvm::OpenMPIRBuilder::InsertPointTy
-findAllocaInsertPoint(llvm::IRBuilderBase &builder,
- LLVM::ModuleTranslation &moduleTranslation) {
- // If there is an alloca insertion point on stack, i.e. we are in a nested
+static llvm::OpenMPIRBuilder::InsertPointTy findAllocInsertPoints(
+ llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
+ llvm::SmallVectorImpl<llvm::OpenMPIRBuilder::InsertPointTy> *deallocIPs =
+ nullptr) {
+ // If there is an allocation insertion point on stack, i.e. we are in a nested
// operation and a specific point was provided by some surrounding operation,
// use it.
- llvm::OpenMPIRBuilder::InsertPointTy allocaInsertPoint;
- WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocaStackFrame>(
- [&](OpenMPAllocaStackFrame &frame) {
- allocaInsertPoint = frame.allocaInsertPoint;
+ llvm::OpenMPIRBuilder::InsertPointTy allocInsertPoint;
+ llvm::ArrayRef<llvm::OpenMPIRBuilder::InsertPointTy> deallocInsertPoints;
+ WalkResult walkResult = moduleTranslation.stackWalk<OpenMPAllocStackFrame>(
+ [&](OpenMPAllocStackFrame &frame) {
+ allocInsertPoint = frame.allocInsertPoint;
+ deallocInsertPoints = frame.deallocInsertPoints;
return WalkResult::interrupt();
});
// In cases with multiple levels of outlining, the tree walk might find an
- // alloca insertion point that is inside the original function while the
- // builder insertion point is inside the outlined function. We need to make
- // sure that we do not use it in those cases.
+ // insertion point that is inside the original function while the builder
+ // insertion point is inside the outlined function. We need to make sure that
+ // we do not use it in those cases.
if (walkResult.wasInterrupted() &&
- allocaInsertPoint.getBlock()->getParent() ==
- builder.GetInsertBlock()->getParent())
- return allocaInsertPoint;
+ allocInsertPoint.getBlock()->getParent() ==
+ builder.GetInsertBlock()->getParent()) {
+ if (deallocIPs)
+ deallocIPs->insert(deallocIPs->end(), deallocInsertPoints.begin(),
+ deallocInsertPoints.end());
+ return allocInsertPoint;
+ }
// Otherwise, insert to the entry block of the surrounding function.
// If the current IRBuilder InsertPoint is the function's entry, it cannot
@@ -509,7 +519,7 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
// confusion. Create a new BasicBlock for the Builder and use the entry block
// for the allocs.
// TODO: Create a dedicated alloca BasicBlock at function creation such that
- // we do not need to move the current InertPoint here.
+ // we do not need to move the current InsertPoint here.
if (builder.GetInsertBlock() ==
&builder.GetInsertBlock()->getParent()->getEntryBlock()) {
assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
@@ -521,6 +531,16 @@ findAllocaInsertPoint(llvm::IRBuilderBase &builder,
builder.SetInsertPoint(entryBB);
}
+ // Collect exit blocks, which is where explicit deallocations should happen in
+ // this case.
+ if (deallocIPs) {
+ for (llvm::BasicBlock &block : *builder.GetInsertBlock()->getParent()) {
+ llvm::Instruction *terminator = block.getTerminator();
+ if (isa_and_present<llvm::ReturnInst>(terminator))
+ deallocIPs->emplace_back(&block, terminator->getIterator());
+ }
+ }
+
llvm::BasicBlock &funcEntryBlock =
builder.GetInsertBlock()->getParent()->getEntryBlock();
return llvm::OpenMPIRBuilder::InsertPointTy(
@@ -708,7 +728,8 @@ convertOmpMasked(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
- auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
// MaskedOp has only one region associated with it.
auto &region = maskedOp.getRegion();
builder.restoreIP(codeGenIP);
@@ -752,7 +773,8 @@ convertOmpMaster(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
- auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
// MasterOp has only one region associated with it.
auto &region = masterOp.getRegion();
builder.restoreIP(codeGenIP);
@@ -787,7 +809,8 @@ convertOmpCritical(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
- auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
// CriticalOp has only one region associated with it.
auto &region = cast<omp::CriticalOp>(opInst).getRegion();
builder.restoreIP(codeGenIP);
@@ -1047,7 +1070,7 @@ convertOmpOrdered(Operation &opInst, llvm::IRBuilderBase &builder,
indexVecValues++;
}
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createOrderedDepend(
ompLoc, allocaIP, numLoops, storeValues, ".cnt.addr", isDependSource));
@@ -1066,7 +1089,8 @@ convertOmpOrderedRegion(Operation &opInst, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(opInst)))
return failure();
- auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ auto bodyGenCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
// OrderedOp has only one region associated with it.
auto &region = cast<omp::OrderedRegionOp>(opInst).getRegion();
builder.restoreIP(codeGenIP);
@@ -1862,7 +1886,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(sectionsOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
SmallVector<llvm::Value *> privateReductionVariables(
sectionsOp.getNumReductionVars());
@@ -1886,7 +1910,8 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
Region &region = sectionOp.getRegion();
auto sectionCB = [&sectionsOp, &region, &builder, &moduleTranslation](
- InsertPointTy allocaIP, InsertPointTy codeGenIP) {
+ InsertPointTy allocIP, InsertPointTy codeGenIP,
+ ArrayRef<InsertPointTy> deallocIPs) {
builder.restoreIP(codeGenIP);
// map the omp.section reduction block argument to the omp.sections block
@@ -1931,7 +1956,7 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder,
// called for variables which have destructors/finalizers.
auto finiCB = [&](InsertPointTy codeGenIP) { return llvm::Error::success(); };
- allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ allocaIP = findAllocInsertPoints(builder, moduleTranslation);
bool isCancellable = constructIsCancellable(sectionsOp);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
@@ -1960,7 +1985,8 @@ convertOmpSingle(omp::SingleOp &singleOp, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(*singleOp)))
return failure();
- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
+ auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
builder.restoreIP(codegenIP);
return convertOmpOpRegions(singleOp.getRegion(), "omp.single.region",
builder, moduleTranslation)
@@ -2043,7 +2069,7 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
SmallVector<llvm::Value *> privateReductionVariables(numReductionVars);
llvm::ArrayRef<bool> isByRef;
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
// Only do teams reduction if there is no distribute op that captures the
// reduction instead.
@@ -2065,9 +2091,10 @@ convertOmpTeams(omp::TeamsOp op, llvm::IRBuilderBase &builder,
return failure();
}
- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
builder.restoreIP(codegenIP);
return convertOmpOpRegions(op.getRegion(), "omp.teams.region", builder,
moduleTranslation)
@@ -2324,9 +2351,9 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// code outside of the outlined task region, which is what we want because
// this way the initialization and copy regions are executed immediately while
// the host variable data are still live.
-
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<InsertPointTy> deallocIPs;
+ InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
// Not using splitBB() because that requires the current block to have a
// terminator.
@@ -2356,8 +2383,8 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
// Allocate and initialize private variables
builder.SetInsertPoint(initBlock->getTerminator());
@@ -2421,12 +2448,12 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
// Set up for call to createTask()
builder.SetInsertPoint(taskStartBlock);
- auto bodyCB = [&](InsertPointTy allocaIP,
- InsertPointTy codegenIP) -> llvm::Error {
+ auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
// translate the body of the task:
builder.restoreIP(codegenIP);
@@ -2444,7 +2471,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
llvm::IRBuilderBase::InsertPointGuard guard(builder);
llvm::Type *llvmAllocType =
moduleTranslation.convertType(privDecl.getType());
- builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+ builder.SetInsertPoint(allocIP.getBlock()->getTerminator());
llvm::Value *llvmPrivateVar = builder.CreateAlloca(
llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
@@ -2518,7 +2545,7 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTask(
- ompLoc, allocaIP, bodyCB, !taskOp.getUntied(),
+ ompLoc, allocIP, deallocIPs, bodyCB, !taskOp.getUntied(),
moduleTranslation.lookupValue(taskOp.getFinal()),
moduleTranslation.lookupValue(taskOp.getIfExpr()), dds,
taskOp.getMergeable(),
@@ -2543,18 +2570,21 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder,
if (failed(checkImplementationStatus(*tgOp)))
return failure();
- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codegenIP) {
+ auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codegenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) {
builder.restoreIP(codegenIP);
return convertOmpOpRegions(tgOp.getRegion(), "omp.taskgroup.region",
builder, moduleTranslation)
.takeError();
};
- InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<InsertPointTy> deallocIPs;
+ InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
- moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocaIP,
- bodyCB);
+ moduleTranslation.getOpenMPBuilder()->createTaskgroup(ompLoc, allocIP,
+ deallocIPs, bodyCB);
if (failed(handleError(afterIP, *tgOp)))
return failure();
@@ -2604,8 +2634,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(wsloopOp, reductionDecls);
+
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
@@ -2762,10 +2793,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
opInst.getNumReductionVars());
SmallVector<DeferredStore> deferredStores;
- auto bodyGenCB = [&](InsertPointTy allocaIP,
- InsertPointTy codeGenIP) -> llvm::Error {
+ auto bodyGenCB =
+ [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- opInst, builder, moduleTranslation, privateVarsInfo, allocaIP);
+ opInst, builder, moduleTranslation, privateVarsInfo, allocIP);
if (handleError(afterAllocas, *opInst).failed())
return llvm::make_error<PreviouslyReportedError>();
@@ -2775,12 +2807,11 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
MutableArrayRef<BlockArgument> reductionArgs =
cast<omp::BlockArgOpenMPOpInterface>(*opInst).getReductionBlockArgs();
- allocaIP =
- InsertPointTy(allocaIP.getBlock(),
- allocaIP.getBlock()->getTerminator()->getIterator());
+ allocIP = InsertPointTy(allocIP.getBlock(),
+ allocIP.getBlock()->getTerminator()->getIterator());
if (failed(allocReductionVars(
- opInst, reductionArgs, builder, moduleTranslation, allocaIP,
+ opInst, reductionArgs, builder, moduleTranslation, allocIP,
reductionDecls, privateReductionVariables, reductionVariableMap,
deferredStores, isByRef)))
return llvm::make_error<PreviouslyReportedError>();
@@ -2809,8 +2840,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
// ParallelOp has only one region associated with it.
llvm::Expected<llvm::BasicBlock *> regionBlock = convertOmpOpRegions(
@@ -2837,7 +2868,7 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointOrErrorTy contInsertPoint =
ompBuilder->createReductions(
- builder.saveIP(), allocaIP, reductionInfos, isByRef,
+ builder.saveIP(), allocIP, reductionInfos, isByRef,
/*IsNoWait=*/false, /*IsTeamsReduction=*/false);
if (!contInsertPoint)
return contInsertPoint.takeError();
@@ -2898,13 +2929,15 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
pbKind = getProcBindKind(*bind);
bool isCancellable = constructIsCancellable(opInst);
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
+ llvm::OpenMPIRBuilder::InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
- ompBuilder->createParallel(ompLoc, allocaIP, bodyGenCB, privCB, finiCB,
- ifCond, numThreads, pbKind, isCancellable);
+ ompBuilder->createParallel(ompLoc, allocIP, deallocIPs, bodyGenCB, privCB,
+ finiCB, ifCond, numThreads, pbKind,
+ isCancellable);
if (failed(handleError(afterIP, *opInst)))
return failure();
@@ -2949,7 +2982,7 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
assert(isByRef.size() == simdOp.getNumReductionVars());
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
simdOp, builder, moduleTranslation, privateVarsInfo, allocaIP);
@@ -3255,7 +3288,7 @@ convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -3282,7 +3315,7 @@ convertOmpAtomicWrite(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::AtomicOrdering ao = convertAtomicOrdering(writeOp.getMemoryOrder());
@@ -3399,7 +3432,7 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
isRemoteMemory);
// Handle ambiguous alloca, if any.
- auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ auto allocaIP = findAllocInsertPoints(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
@@ -3500,7 +3533,7 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
isFineGrainedMemory, isRemoteMemory);
// Handle ambiguous alloca, if any.
- auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
+ auto allocaIP = findAllocInsertPoints(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
ompBuilder->createAtomicCapture(
@@ -4464,7 +4497,7 @@ createAlteredByCaptureMap(MapInfoData &mapData,
if (!isPtrTy) {
auto curInsert = builder.saveIP();
llvm::DebugLoc DbgLoc = builder.getCurrentDebugLocation();
- builder.restoreIP(findAllocaInsertPoint(builder, moduleTranslation));
+ builder.restoreIP(findAllocInsertPoints(builder, moduleTranslation));
auto *memTempAlloc =
builder.CreateAlloca(builder.getPtrTy(), nullptr, ".casted");
builder.SetCurrentDebugLocation(DbgLoc);
@@ -4842,18 +4875,21 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
};
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
+ llvm::OpenMPIRBuilder::InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP = [&]() {
if (isa<omp::TargetDataOp>(op))
- return ompBuilder->createTargetData(ompLoc, allocaIP, builder.saveIP(),
+ return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(),
+ deallocIPs,
builder.getInt64(deviceID), ifCond,
info, genMapInfoCB, customMapperCB,
/*MapperFunc=*/nullptr, bodyGenCB,
/*DeviceAddrCB=*/nullptr);
- return ompBuilder->createTargetData(
- ompLoc, allocaIP, builder.saveIP(), builder.getInt64(deviceID), ifCond,
- info, genMapInfoCB, customMapperCB, &RTLFn);
+ return ompBuilder->createTargetData(ompLoc, allocIP, builder.saveIP(),
+ deallocIPs, builder.getInt64(deviceID),
+ ifCond, info, genMapInfoCB,
+ customMapperCB, &RTLFn);
}();
if (failed(handleError(afterIP, *op)))
@@ -4889,7 +4925,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
collectReductionDecls(teamsOp, reductionDecls);
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ findAllocInsertPoints(builder, moduleTranslation);
MutableArrayRef<BlockArgument> reductionArgs =
llvm::cast<omp::BlockArgOpenMPOpInterface>(*teamsOp)
@@ -4903,19 +4939,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
}
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- auto bodyGenCB = [&](InsertPointTy allocaIP,
- InsertPointTy codeGenIP) -> llvm::Error {
+ auto bodyGenCB =
+ [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ llvm::ArrayRef<InsertPointTy> deallocIPs) -> llvm::Error {
// Save the alloca insertion point on ModuleTranslation stack for use in
// nested regions.
- LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
- moduleTranslation, allocaIP);
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
// DistributeOp has only one region associated with it.
builder.restoreIP(codeGenIP);
PrivateVarsInfo privVarsInfo(distributeOp);
llvm::Expected<llvm::BasicBlock *> afterAllocas = allocatePrivateVars(
- distributeOp, builder, moduleTranslation, privVarsInfo, allocaIP);
+ distributeOp, builder, moduleTranslation, privVarsInfo, allocIP);
if (handleError(afterAllocas, opInst).failed())
return llvm::make_error<PreviouslyReportedError>();
@@ -4958,7 +4995,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
findCurrentLoopInfo(moduleTranslation);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
ompBuilder->applyWorkshareLoop(
- ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
+ ompLoc.DL, loopInfo, allocIP, loopNeedsBarrier,
convertToScheduleKind(schedule), chunk, isSimd,
scheduleMod == omp::ScheduleModifier::monotonic,
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
@@ -4975,11 +5012,12 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
return llvm::Error::success();
};
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
+ llvm::OpenMPIRBuilder::InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
- ompBuilder->createDistribute(ompLoc, allocaIP, bodyGenCB);
+ ompBuilder->createDistribute(ompLoc, allocIP, deallocIPs, bodyGenCB);
if (failed(handleError(afterIP, opInst)))
return failure();
@@ -4989,7 +5027,7 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
if (doDistributeReduction) {
// Process the reductions if required.
return createReductionsAndCleanup(
- teamsOp, builder, moduleTranslation, allocaIP, reductionDecls,
+ teamsOp, builder, moduleTranslation, allocIP, reductionDecls,
privateReductionVariables, isByRef,
/*isNoWait*/ false, /*isTeamsReduction*/ true);
}
@@ -5659,7 +5697,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
}
using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
- auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP)
+ auto bodyCB = [&](InsertPointTy allocIP, InsertPointTy codeGenIP,
+ ArrayRef<InsertPointTy> deallocIPs)
-> llvm::OpenMPIRBuilder::InsertPointOrErrorTy {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.SetCurrentDebugLocation(llvm::DebugLoc());
@@ -5701,7 +5740,7 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::Expected<llvm::BasicBlock *> afterAllocas =
allocatePrivateVars(targetOp, builder, moduleTranslation,
- privateVarsInfo, allocaIP, &mappedPrivateVars);
+ privateVarsInfo, allocIP, &mappedPrivateVars);
if (failed(handleError(afterAllocas, *targetOp)))
return llvm::make_error<PreviouslyReportedError>();
@@ -5726,6 +5765,8 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
return &privatizer.getDeallocRegion();
});
+ LLVM::ModuleTranslation::SaveStack<OpenMPAllocStackFrame> frame(
+ moduleTranslation, allocIP, deallocIPs);
llvm::Expected<llvm::BasicBlock *> exitBlock = convertOmpOpRegions(
targetRegion, "omp.target", builder, moduleTranslation);
@@ -5829,8 +5870,9 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
buildDependData(targetOp.getDependKinds(), targetOp.getDependVars(),
moduleTranslation, dds);
- llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
- findAllocaInsertPoint(builder, moduleTranslation);
+ llvm::SmallVector<llvm::OpenMPIRBuilder::InsertPointTy> deallocIPs;
+ llvm::OpenMPIRBuilder::InsertPointTy allocIP =
+ findAllocInsertPoints(builder, moduleTranslation, &deallocIPs);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::TargetDataInfo info(
@@ -5852,9 +5894,10 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
moduleTranslation.getOpenMPBuilder()->createTarget(
- ompLoc, isOffloadEntry, allocaIP, builder.saveIP(), info, entryInfo,
- defaultAttrs, runtimeAttrs, ifCond, kernelInput, genMapInfoCB, bodyCB,
- argAccessorCB, customMapperCB, dds, targetOp.getNowait());
+ ompLoc, isOffloadEntry, allocIP, builder.saveIP(), deallocIPs, info,
+ entryInfo, defaultAttrs, runtimeAttrs, ifCond, kernelInput,
+ genMapInfoCB, bodyCB, argAccessorCB, customMapperCB, dds,
+ targetOp.getNowait());
if (failed(handleError(afterIP, opInst)))
return failure();
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
index ca998b4..c3ce2f6 100644
--- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir
@@ -55,21 +55,21 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK: define weak_odr protected amdgpu_kernel void @[[FUNC0:.*]](
// CHECK-SAME: ptr %[[TMP:.*]], ptr %[[TMP0:.*]]) #{{[0-9]+}} {
// CHECK: %[[TMP1:.*]] = alloca [1 x ptr], align 8, addrspace(5)
-// CHECK: %[[TMP2:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
-// CHECK: %[[TMP3:.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK: %[[TMP4:.*]] = addrspacecast ptr addrspace(5) %[[TMP3]] to ptr
-// CHECK: store ptr %[[TMP0]], ptr %[[TMP4]], align 8
-// CHECK: %[[TMP5:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
-// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP5]], -1
+// CHECK: %[[TMP2:.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK: %[[TMP3:.*]] = addrspacecast ptr addrspace(5) %[[TMP2]] to ptr
+// CHECK: store ptr %[[TMP0]], ptr %[[TMP3]], align 8
+// CHECK: %[[TMP4:.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{.*}} to ptr), ptr %[[TMP]])
+// CHECK: %[[EXEC_USER_CODE:.*]] = icmp eq i32 %[[TMP4]], -1
// CHECK: br i1 %[[EXEC_USER_CODE]], label %[[USER_CODE_ENTRY:.*]], label %[[WORKER_EXIT:.*]]
-// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP4]], align 8
+// CHECK: %[[TMP5:.*]] = addrspacecast ptr addrspace(5) %[[TMP1]] to ptr
// CHECK: %[[STRUCTARG:.*]] = call align 8 ptr @__kmpc_alloc_shared(i64 8)
+// CHECK: %[[TMP6:.*]] = load ptr, ptr %[[TMP3]], align 8
// CHECK: %[[OMP_GLOBAL_THREAD_NUM:.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr))
// CHECK: %[[GEP_:.*]] = getelementptr { ptr }, ptr %[[STRUCTARG]], i32 0, i32 0
// CHECK: store ptr %[[TMP6]], ptr %[[GEP_]], align 8
-// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP2]], i64 0, i64 0
+// CHECK: %[[TMP7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[TMP5]], i64 0, i64 0
// CHECK: store ptr %[[STRUCTARG]], ptr %[[TMP7]], align 8
-// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP2]], i64 1)
+// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 1, i32 -1, i32 -1, ptr @[[FUNC1:.*]], ptr @[[FUNC1_WRAPPER:.*]], ptr %[[TMP5]], i64 1)
// CHECK: call void @__kmpc_free_shared(ptr %[[STRUCTARG]], i64 8)
// CHECK: call void @__kmpc_target_deinit()
diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
index 5a76871..3ebb79f 100644
--- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir
@@ -56,7 +56,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo
// CHECK: %[[B:.*]] = load i32, ptr %[[PTR_B]], align 4
// CHECK: %[[C:.*]] = add i32 %[[A]], %[[B]]
// CHECK: store i32 %[[C]], ptr %[[PTR_C]], align 4
-// CHECK: br label %[[LABEL_DEINIT:.*]]
+// CHECK: br label %[[LABEL_TARGET_EXIT:.*]]
+// CHECK: [[LABEL_TARGET_EXIT]]:
+// CHECK-NEXT: br label %[[LABEL_DEINIT:.*]]
// CHECK: [[LABEL_DEINIT]]:
// CHECK-NEXT: call void @__kmpc_target_deinit()
// CHECK-NEXT: ret void
diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
index 0ee9230..2aa11f3 100644
--- a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir
@@ -70,4 +70,6 @@ llvm.func @_FortranAAssign(!llvm.ptr, !llvm.ptr, !llvm.ptr, i32) -> !llvm.struct
// CHECK: call void @dealloc_foo_1(ptr %[[DESC_TO_DEALLOC]])
// CHECK-NEXT: br label %[[CONT_BLOCK:.*]]
// CHECK: [[CONT_BLOCK]]:
+// CHECK-NEXT: br label %[[EXIT_BLOCK:.*]]
+// CHECK: [[EXIT_BLOCK]]:
// CHECK-NEXT: ret void