aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h219
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp409
-rw-r--r--llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp139
3 files changed, 765 insertions, 2 deletions
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index b681ea8..f706591 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -31,6 +31,7 @@
namespace llvm {
class CanonicalLoopInfo;
+class ScanInfo;
struct TargetRegionEntryInfo;
class OffloadEntriesInfoManager;
class OpenMPIRBuilder;
@@ -707,6 +708,9 @@ public:
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(
const LocationDescription &Loc, omp::Directive CanceledDirective);
+ /// Creates a ScanInfo object, allocates and returns the pointer.
+ Expected<ScanInfo *> scanInfoInitialize();
+
/// Generator for '#omp parallel'
///
/// \param Loc The insert and source location description.
@@ -750,6 +754,42 @@ public:
LoopBodyGenCallbackTy BodyGenCB, Value *TripCount,
const Twine &Name = "loop");
+ /// Generator for the control flow structure of an OpenMP canonical loops if
+ /// the parent directive has an `inscan` modifier specified.
+ /// If the `inscan` modifier is specified, the region of the parent is
+ /// expected to have a `scan` directive. Based on the clauses in
+ /// scan directive, the body of the loop is split into two loops: Input loop
+ /// and Scan Loop. Input loop contains the code generated for input phase of
+ /// scan and Scan loop contains the code generated for scan phase of scan.
+ /// From the bodyGen callback of these loops, `createScan` would be called
+ /// when a scan directive is encountered from the loop body. `createScan`
+ /// based on whether 1. inclusive or exclusive scan is specified and, 2. input
+ /// loop or scan loop is generated, lowers the body of the for loop
+ /// accordingly.
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param BodyGenCB Callback that will generate the loop body code.
+ /// \param Start Value of the loop counter for the first iterations.
+ /// \param Stop Loop counter values past this will stop the loop.
+ /// \param Step Loop counter increment after each iteration; negative
+ /// means counting down.
+ /// \param IsSigned Whether Start, Stop and Step are signed integers.
+ /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
+ /// counter.
+ /// \param ComputeIP Insertion point for instructions computing the trip
+ /// count. Can be used to ensure the trip count is available
+ /// at the outermost loop of a loop nest. If not set,
+ /// defaults to the preheader of the generated loop.
+ /// \param Name Base name used to derive BB and instruction names.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \returns A vector containing Loop Info of Input Loop and Scan Loop.
+ Expected<SmallVector<llvm::CanonicalLoopInfo *>> createCanonicalScanLoops(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo);
+
/// Calculate the trip count of a canonical loop.
///
/// This allows specifying user-defined loop counter values using increment,
@@ -818,13 +858,17 @@ public:
/// at the outermost loop of a loop nest. If not set,
/// defaults to the preheader of the generated loop.
/// \param Name Base name used to derive BB and instruction names.
+ /// \param InScan Whether loop has a scan reduction specified.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
///
/// \returns An object representing the created control flow structure which
/// can be used for loop-associated directives.
LLVM_ABI Expected<CanonicalLoopInfo *> createCanonicalLoop(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP = {}, const Twine &Name = "loop");
+ InsertPointTy ComputeIP = {}, const Twine &Name = "loop",
+ bool InScan = false, ScanInfo *ScanRedInfo = nullptr);
/// Collapse a loop nest into a single loop.
///
@@ -1556,6 +1600,47 @@ private:
ArrayRef<OpenMPIRBuilder::ReductionInfo> ReductionInfos,
Function *ReduceFn, AttributeList FuncAttrs);
+ /// Helper function for CreateCanonicalScanLoops to create InputLoop
+ /// in the firstGen and Scan Loop in the SecondGen
+ /// \param InputLoopGen Callback for generating the loop for input phase
+ /// \param ScanLoopGen Callback for generating the loop for scan phase
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveIR(
+ llvm::function_ref<Error()> InputLoopGen,
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
+ ScanInfo *ScanRedInfo);
+
+ /// Creates the basic blocks required for scan reduction.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ void createScanBBs(ScanInfo *ScanRedInfo);
+
+ /// Dynamically allocates the buffer needed for scan reduction.
+ /// \param AllocaIP The IP where possibly-shared pointer of buffer needs to
+ /// be declared.
+ /// \param ScanVars Scan Variables.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveDeclsIR(InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars,
+ ArrayRef<llvm::Type *> ScanVarsType,
+ ScanInfo *ScanRedInfo);
+
+ /// Copies the result back to the reduction variable.
+ /// \param ReductionInfos Array type containing the ReductionOps.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \return error if any produced, else return success.
+ Error emitScanBasedDirectiveFinalsIR(
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanInfo);
+
/// This function emits a helper that gathers Reduce lists from the first
/// lane of every active warp to lanes in the first warp.
///
@@ -2184,6 +2269,9 @@ public:
/// free'd.
std::forward_list<CanonicalLoopInfo> LoopInfos;
+ /// Collection of owned ScanInfo objects that eventually need to be free'd.
+ std::forward_list<ScanInfo> ScanInfos;
+
/// Add a new region that will be outlined later.
void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
@@ -2639,6 +2727,48 @@ public:
FinalizeCallbackTy FiniCB,
Value *Filter);
+ /// This function performs the scan reduction of the values updated in
+ /// the input phase. The reduction logic needs to be emitted between input
+ /// and scan loop returned by `CreateCanonicalScanLoops`. The following
+ /// is the code that is generated, `buffer` and `span` are expected to be
+ /// populated before executing the generated code.
+ /// \code{c}
+ /// for (int k = 0; k != ceil(log2(span)); ++k) {
+ /// i=pow(2,k)
+ /// for (size cnt = last_iter; cnt >= i; --cnt)
+ /// buffer[cnt] op= buffer[cnt-i];
+ /// }
+ /// \endcode
+ /// \param Loc The insert and source location description.
+ /// \param ReductionInfos Array type containing the ReductionOps.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \returns The insertion position *after* the masked.
+ InsertPointOrErrorTy emitScanReduction(
+ const LocationDescription &Loc,
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanRedInfo);
+
+ /// This directive split and directs the control flow to input phase
+ /// blocks or scan phase blocks based on 1. whether input loop or scan loop
+ /// is executed, 2. whether exclusive or inclusive scan is used.
+ ///
+ /// \param Loc The insert and source location description.
+ /// \param AllocaIP The IP where the temporary buffer for scan reduction
+ // needs to be allocated.
+ /// \param ScanVars Scan Variables.
+ /// \param IsInclusive Whether it is an inclusive or exclusive scan.
+ /// \param ScanRedInfo Pointer to the ScanInfo objected created using
+ /// `ScanInfoInitialize`.
+ ///
+ /// \returns The insertion position *after* the scan.
+ InsertPointOrErrorTy createScan(const LocationDescription &Loc,
+ InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars,
+ ArrayRef<llvm::Type *> ScanVarsType,
+ bool IsInclusive, ScanInfo *ScanRedInfo);
+
/// Generator for '#omp critical'
///
/// \param Loc The insert and source location description.
@@ -3779,6 +3909,93 @@ public:
LLVM_ABI void invalidate();
};
+/// ScanInfo holds the information to assist in lowering of Scan reduction.
+/// Before lowering, the body of the for loop specifying scan reduction is
+/// expected to have the following structure
+///
+/// Loop Body Entry
+/// |
+/// Code before the scan directive
+/// |
+/// Scan Directive
+/// |
+/// Code after the scan directive
+/// |
+/// Loop Body Exit
+/// When `createCanonicalScanLoops` is executed, the bodyGen callback of it
+/// transforms the body to:
+///
+/// Loop Body Entry
+/// |
+/// OMPScanDispatch
+///
+/// OMPBeforeScanBlock
+/// |
+/// OMPScanLoopExit
+/// |
+/// Loop Body Exit
+///
+/// The insert point is updated to the first insert point of OMPBeforeScanBlock.
+/// It dominates the control flow of code generated until
+/// scan directive is encountered and OMPAfterScanBlock dominates the
+/// control flow of code generated after scan is encountered. The successor
+/// of OMPScanDispatch can be OMPBeforeScanBlock or OMPAfterScanBlock based
+/// on 1.whether it is in Input phase or Scan Phase , 2. whether it is an
+/// exclusive or inclusive scan. This jump is added when `createScan` is
+/// executed. If input loop is being generated, if it is inclusive scan,
+/// `OMPAfterScanBlock` succeeds `OMPScanDispatch` , if exclusive,
+/// `OMPBeforeScanBlock` succeeds `OMPDispatch` and vice versa for scan loop. At
+/// the end of the input loop, temporary buffer is populated and at the
+/// beginning of the scan loop, temporary buffer is read. After scan directive
+/// is encountered, insertion point is updated to `OMPAfterScanBlock` as it is
+/// expected to dominate the code after the scan directive. Both Before and
+/// After scan blocks are succeeded by `OMPScanLoopExit`.
+/// Temporary buffer allocations are done in `ScanLoopInit` block before the
+/// lowering of for-loop. The results are copied back to reduction variable in
+/// `ScanLoopFinish` block.
+class ScanInfo {
+public:
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPBeforeScanBlock = nullptr;
+
+ /// Dominates the body of the loop before scan directive
+ llvm::BasicBlock *OMPAfterScanBlock = nullptr;
+
+ /// Controls the flow to before or after scan blocks
+ llvm::BasicBlock *OMPScanDispatch = nullptr;
+
+ /// Exit block of loop body
+ llvm::BasicBlock *OMPScanLoopExit = nullptr;
+
+ /// Block before loop body where scan initializations are done
+ llvm::BasicBlock *OMPScanInit = nullptr;
+
+ /// Block after loop body where scan finalizations are done
+ llvm::BasicBlock *OMPScanFinish = nullptr;
+
+ /// If true, it indicates Input phase is lowered; else it indicates
+ /// ScanPhase is lowered
+ bool OMPFirstScanLoop = false;
+
+ /// Maps the private reduction variable to the pointer of the temporary
+ /// buffer
+ llvm::SmallDenseMap<llvm::Value *, llvm::Value *> *ScanBuffPtrs;
+
+ /// Keeps track of value of iteration variable for input/scan loop to be
+ /// used for Scan directive lowering
+ llvm::Value *IV;
+
+ /// Stores the span of canonical loop being lowered to be used for temporary
+ /// buffer allocation or Finalization.
+ llvm::Value *Span;
+
+ ScanInfo() {
+ ScanBuffPtrs = new llvm::SmallDenseMap<llvm::Value *, llvm::Value *>();
+ }
+
+ ~ScanInfo() { delete (ScanBuffPtrs); }
+};
+
} // end namespace llvm
#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 260d3c2..ea027e4 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4014,6 +4014,340 @@ OpenMPIRBuilder::createMasked(const LocationDescription &Loc,
/*Conditional*/ true, /*hasFinalize*/ true);
}
+static llvm::CallInst *emitNoUnwindRuntimeCall(IRBuilder<> &Builder,
+ llvm::FunctionCallee Callee,
+ ArrayRef<llvm::Value *> Args,
+ const llvm::Twine &Name) {
+ llvm::CallInst *Call = Builder.CreateCall(
+ Callee, Args, SmallVector<llvm::OperandBundleDef, 1>(), Name);
+ Call->setDoesNotThrow();
+ return Call;
+}
+
+// Expects input basic block is dominated by BeforeScanBB.
+// Once Scan directive is encountered, the code after scan directive should be
+// dominated by AfterScanBB. Scan directive splits the code sequence to
+// scan and input phase. Based on whether inclusive or exclusive
+// clause is used in the scan directive and whether input loop or scan loop
+// is lowered, it adds jumps to input and scan phase. First Scan loop is the
+// input loop and second is the scan loop. The code generated handles only
+// inclusive scans now.
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ ArrayRef<llvm::Value *> ScanVars, ArrayRef<llvm::Type *> ScanVarsType,
+ bool IsInclusive, ScanInfo *ScanRedInfo) {
+ if (ScanRedInfo->OMPFirstScanLoop) {
+ llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
+ ScanVarsType, ScanRedInfo);
+ if (Err)
+ return Err;
+ }
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ llvm::Value *IV = ScanRedInfo->IV;
+
+ if (ScanRedInfo->OMPFirstScanLoop) {
+ // Emit buffer[i] = red; at the end of the input phase.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = ScanVarsType[i];
+ Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
+
+ Builder.CreateStore(Src, Val);
+ }
+ }
+ Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
+ emitBlock(ScanRedInfo->OMPScanDispatch,
+ Builder.GetInsertBlock()->getParent());
+
+ if (!ScanRedInfo->OMPFirstScanLoop) {
+ IV = ScanRedInfo->IV;
+ // Emit red = buffer[i]; at the entrance to the scan phase.
+ // TODO: if exclusive scan, the red = buffer[i-1] needs to be updated.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = ScanVarsType[i];
+ Value *SrcPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
+ Builder.CreateStore(Src, ScanVars[i]);
+ }
+ }
+
+ // TODO: Update it to CreateBr and remove dead blocks
+ llvm::Value *CmpI = Builder.getInt1(true);
+ if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
+ Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
+ ScanRedInfo->OMPAfterScanBlock);
+ } else {
+ Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
+ ScanRedInfo->OMPBeforeScanBlock);
+ }
+ emitBlock(ScanRedInfo->OMPAfterScanBlock,
+ Builder.GetInsertBlock()->getParent());
+ Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
+ return Builder.saveIP();
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
+ InsertPointTy AllocaIP, ArrayRef<Value *> ScanVars,
+ ArrayRef<Type *> ScanVarsType, ScanInfo *ScanRedInfo) {
+
+ Builder.restoreIP(AllocaIP);
+ // Create the shared pointer at alloca IP.
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ llvm::Value *BuffPtr =
+ Builder.CreateAlloca(Builder.getPtrTy(), nullptr, "vla");
+ (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
+ }
+
+ // Allocate temporary buffer by master thread
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ Value *AllocSpan =
+ Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
+ for (size_t i = 0; i < ScanVars.size(); i++) {
+ Type *IntPtrTy = Builder.getInt32Ty();
+ Constant *Allocsize = ConstantExpr::getSizeOf(ScanVarsType[i]);
+ Allocsize = ConstantExpr::getTruncOrBitCast(Allocsize, IntPtrTy);
+ Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
+ AllocSpan, nullptr, "arr");
+ Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
+ }
+ return Error::success();
+ };
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ BasicBlock *InputBB = Builder.GetInsertBlock();
+ if (InputBB->getTerminator())
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+
+ return Error::success();
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
+ ArrayRef<ReductionInfo> ReductionInfos, ScanInfo *ScanRedInfo) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ for (ReductionInfo RedInfo : ReductionInfos) {
+ Value *PrivateVar = RedInfo.PrivateVariable;
+ Value *OrigVar = RedInfo.Variable;
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+
+ Type *SrcTy = RedInfo.ElementType;
+ Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
+ "arrayOffset");
+ Value *Src = Builder.CreateLoad(SrcTy, Val);
+
+ Builder.CreateStore(Src, OrigVar);
+ Builder.CreateFree(Buff);
+ }
+ return Error::success();
+ };
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ if (ScanRedInfo->OMPScanFinish->getTerminator())
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
+ else
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
+
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ BasicBlock *InputBB = Builder.GetInsertBlock();
+ if (InputBB->getTerminator())
+ Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ return Error::success();
+}
+
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
+ const LocationDescription &Loc,
+ ArrayRef<llvm::OpenMPIRBuilder::ReductionInfo> ReductionInfos,
+ ScanInfo *ScanRedInfo) {
+
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+ auto BodyGenCB = [&](InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP) -> Error {
+ Builder.restoreIP(CodeGenIP);
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+ // for (int k = 0; k <= ceil(log2(n)); ++k)
+ llvm::BasicBlock *LoopBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.outer.log.scan.body");
+ llvm::BasicBlock *ExitBB =
+ splitBB(Builder, false, "omp.outer.log.scan.exit");
+ llvm::Function *F = llvm::Intrinsic::getOrInsertDeclaration(
+ Builder.GetInsertBlock()->getModule(),
+ (llvm::Intrinsic::ID)llvm::Intrinsic::log2, Builder.getDoubleTy());
+ llvm::BasicBlock *InputBB = Builder.GetInsertBlock();
+ llvm::Value *Arg =
+ Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
+ llvm::Value *LogVal = emitNoUnwindRuntimeCall(Builder, F, Arg, "");
+ F = llvm::Intrinsic::getOrInsertDeclaration(
+ Builder.GetInsertBlock()->getModule(),
+ (llvm::Intrinsic::ID)llvm::Intrinsic::ceil, Builder.getDoubleTy());
+ LogVal = emitNoUnwindRuntimeCall(Builder, F, LogVal, "");
+ LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
+ llvm::Value *NMin1 = Builder.CreateNUWSub(
+ ScanRedInfo->Span,
+ llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
+ Builder.SetInsertPoint(InputBB);
+ Builder.CreateBr(LoopBB);
+ emitBlock(LoopBB, CurFn);
+ Builder.SetInsertPoint(LoopBB);
+
+ PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ // size pow2k = 1;
+ PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ Counter->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
+ InputBB);
+ Pow2K->addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
+ InputBB);
+ // for (size i = n - 1; i >= 2 ^ k; --i)
+ // tmp[i] op= tmp[i-pow2k];
+ llvm::BasicBlock *InnerLoopBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.body");
+ llvm::BasicBlock *InnerExitBB =
+ BasicBlock::Create(CurFn->getContext(), "omp.inner.log.scan.exit");
+ llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
+ Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ emitBlock(InnerLoopBB, CurFn);
+ Builder.SetInsertPoint(InnerLoopBB);
+ PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
+ IVal->addIncoming(NMin1, LoopBB);
+ for (ReductionInfo RedInfo : ReductionInfos) {
+ Value *ReductionVal = RedInfo.PrivateVariable;
+ Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
+ Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
+ Type *DestTy = RedInfo.ElementType;
+ Value *IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
+ Value *LHSPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, IV, "arrayOffset");
+ Value *OffsetIval = Builder.CreateNUWSub(IV, Pow2K);
+ Value *RHSPtr =
+ Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval, "arrayOffset");
+ Value *LHS = Builder.CreateLoad(DestTy, LHSPtr);
+ Value *RHS = Builder.CreateLoad(DestTy, RHSPtr);
+ llvm::Value *Result;
+ InsertPointOrErrorTy AfterIP =
+ RedInfo.ReductionGen(Builder.saveIP(), LHS, RHS, Result);
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.CreateStore(Result, LHSPtr);
+ }
+ llvm::Value *NextIVal = Builder.CreateNUWSub(
+ IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
+ IVal->addIncoming(NextIVal, Builder.GetInsertBlock());
+ CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
+ Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
+ emitBlock(InnerExitBB, CurFn);
+ llvm::Value *Next = Builder.CreateNUWAdd(
+ Counter, llvm::ConstantInt::get(Counter->getType(), 1));
+ Counter->addIncoming(Next, Builder.GetInsertBlock());
+ // pow2k <<= 1;
+ llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1, "", /*HasNUW=*/true);
+ Pow2K->addIncoming(NextPow2K, Builder.GetInsertBlock());
+ llvm::Value *Cmp = Builder.CreateICmpNE(Next, LogVal);
+ Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
+ Builder.SetInsertPoint(ExitBB->getFirstInsertionPt());
+ return Error::success();
+ };
+
+ // TODO: Perform finalization actions for variables. This has to be
+ // called for variables which have destructors/finalizers.
+ auto FiniCB = [&](InsertPointTy CodeGenIP) { return llvm::Error::success(); };
+
+ llvm::Value *FilterVal = Builder.getInt32(0);
+ llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
+ createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
+
+ if (!AfterIP)
+ return AfterIP.takeError();
+ Builder.restoreIP(*AfterIP);
+ Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
+ if (Err)
+ return Err;
+
+ return AfterIP;
+}
+
+Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
+ llvm::function_ref<Error()> InputLoopGen,
+ llvm::function_ref<Error(LocationDescription Loc)> ScanLoopGen,
+ ScanInfo *ScanRedInfo) {
+
+ {
+ // Emit loop with input phase:
+ // for (i: 0..<num_iters>) {
+ // <input phase>;
+ // buffer[i] = red;
+ // }
+ ScanRedInfo->OMPFirstScanLoop = true;
+ Error Err = InputLoopGen();
+ if (Err)
+ return Err;
+ }
+ {
+ // Emit loop with scan phase:
+ // for (i: 0..<num_iters>) {
+ // red = buffer[i];
+ // <scan phase>;
+ // }
+ ScanRedInfo->OMPFirstScanLoop = false;
+ Error Err = ScanLoopGen(Builder.saveIP());
+ if (Err)
+ return Err;
+ }
+ return Error::success();
+}
+
+void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
+ Function *Fun = Builder.GetInsertBlock()->getParent();
+ ScanRedInfo->OMPScanDispatch =
+ BasicBlock::Create(Fun->getContext(), "omp.inscan.dispatch");
+ ScanRedInfo->OMPAfterScanBlock =
+ BasicBlock::Create(Fun->getContext(), "omp.after.scan.bb");
+ ScanRedInfo->OMPBeforeScanBlock =
+ BasicBlock::Create(Fun->getContext(), "omp.before.scan.bb");
+ ScanRedInfo->OMPScanLoopExit =
+ BasicBlock::Create(Fun->getContext(), "omp.scan.loop.exit");
+}
CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
BasicBlock *PostInsertBefore, const Twine &Name) {
@@ -4111,6 +4445,76 @@ OpenMPIRBuilder::createCanonicalLoop(const LocationDescription &Loc,
return CL;
}
+Expected<ScanInfo *> OpenMPIRBuilder::scanInfoInitialize() {
+ ScanInfos.emplace_front();
+ ScanInfo *Result = &ScanInfos.front();
+ return Result;
+}
+
+Expected<SmallVector<llvm::CanonicalLoopInfo *>>
+OpenMPIRBuilder::createCanonicalScanLoops(
+ const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
+ Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
+ InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo) {
+ LocationDescription ComputeLoc =
+ ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
+ updateToLocation(ComputeLoc);
+
+ SmallVector<CanonicalLoopInfo *> Result;
+
+ Value *TripCount = calculateCanonicalLoopTripCount(
+ ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
+ ScanRedInfo->Span = TripCount;
+ ScanRedInfo->OMPScanInit = splitBB(Builder, true, "scan.init");
+ Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
+
+ auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
+ Builder.restoreIP(CodeGenIP);
+ ScanRedInfo->IV = IV;
+ createScanBBs(ScanRedInfo);
+ BasicBlock *InputBlock = Builder.GetInsertBlock();
+ Instruction *Terminator = InputBlock->getTerminator();
+ assert(Terminator->getNumSuccessors() == 1);
+ BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
+ Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
+ emitBlock(ScanRedInfo->OMPBeforeScanBlock,
+ Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
+ emitBlock(ScanRedInfo->OMPScanLoopExit,
+ Builder.GetInsertBlock()->getParent());
+ Builder.CreateBr(ContinueBlock);
+ Builder.SetInsertPoint(
+ ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
+ return BodyGenCB(Builder.saveIP(), IV);
+ };
+
+ const auto &&InputLoopGen = [&]() -> Error {
+ Expected<CanonicalLoopInfo *> LoopInfo = createCanonicalLoop(
+ Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
+ ComputeIP, Name, true, ScanRedInfo);
+ if (!LoopInfo)
+ return LoopInfo.takeError();
+ Result.push_back(*LoopInfo);
+ Builder.restoreIP((*LoopInfo)->getAfterIP());
+ return Error::success();
+ };
+ const auto &&ScanLoopGen = [&](LocationDescription Loc) -> Error {
+ Expected<CanonicalLoopInfo *> LoopInfo =
+ createCanonicalLoop(Loc, BodyGen, Start, Stop, Step, IsSigned,
+ InclusiveStop, ComputeIP, Name, true, ScanRedInfo);
+ if (!LoopInfo)
+ return LoopInfo.takeError();
+ Result.push_back(*LoopInfo);
+ Builder.restoreIP((*LoopInfo)->getAfterIP());
+ ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
+ return Error::success();
+ };
+ Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
+ if (Err)
+ return Err;
+ return Result;
+}
+
Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step,
bool IsSigned, bool InclusiveStop, const Twine &Name) {
@@ -4174,7 +4578,8 @@ Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
- InsertPointTy ComputeIP, const Twine &Name) {
+ InsertPointTy ComputeIP, const Twine &Name, bool InScan,
+ ScanInfo *ScanRedInfo) {
LocationDescription ComputeLoc =
ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
@@ -4185,6 +4590,8 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
Builder.restoreIP(CodeGenIP);
Value *Span = Builder.CreateMul(IV, Step);
Value *IndVar = Builder.CreateAdd(Span, Start);
+ if (InScan)
+ ScanRedInfo->IV = IndVar;
return BodyGenCB(Builder.saveIP(), IndVar);
};
LocationDescription LoopLoc =
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index d6b578a..b7a060b 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -23,6 +23,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "gmock/gmock.h"
#include "gtest/gtest.h"
+#include <cstdlib>
#include <optional>
using namespace llvm;
@@ -5360,6 +5361,144 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
EXPECT_TRUE(findGEPZeroOne(ReductionFn->getArg(1), FirstRHS, SecondRHS));
}
+static void createScan(llvm::Value *scanVar, llvm::Type *scanType,
+ OpenMPIRBuilder &OMPBuilder, IRBuilder<> &Builder,
+ OpenMPIRBuilder::LocationDescription Loc,
+ OpenMPIRBuilder::InsertPointTy &allocaIP,
+ ScanInfo *&ScanRedInfo) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ ASSERT_EXPECTED_INIT(InsertPointTy, retIp,
+ OMPBuilder.createScan(Loc, allocaIP, {scanVar},
+ {scanType}, true, ScanRedInfo));
+ Builder.restoreIP(retIp);
+}
+/*
+ Following is the pseudocode of the code generated by the test case
+ <declare pointer to buffer> ptr
+ size num_iters = 100
+ // temp buffer allocation
+ omp masked {
+ buff = malloc(num_iters*scanvarstype)
+ *ptr = buff
+ }
+ barrier;
+ // input phase loop
+ for (i: 0..<num_iters>) {
+ <input phase>;
+ buffer = *ptr;
+ buffer[i] = red;
+ }
+ // scan reduction
+ omp masked
+ {
+ for (int k = 0; k != ceil(log2(num_iters)); ++k) {
+ i=pow(2,k)
+ for (size cnt = last_iter; cnt >= i; --cnt) {
+ buffer = *ptr;
+ buffer[cnt] op= buffer[cnt-i];
+ }
+ }
+ }
+ barrier;
+ // scan phase loop
+ for (0..<num_iters>) {
+ buffer = *ptr;
+ red = buffer[i] ;
+ <scan phase>;
+ }
+ // temp buffer deletion
+ omp masked {
+ free(*ptr)
+ }
+ barrier;
+*/
+TEST_F(OpenMPIRBuilderTest, ScanReduction) {
+ using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
+ OpenMPIRBuilder OMPBuilder(*M);
+ OMPBuilder.initialize();
+ IRBuilder<> Builder(BB);
+ OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
+ Value *TripCount = F->getArg(0);
+ Type *LCTy = TripCount->getType();
+ Value *StartVal = ConstantInt::get(LCTy, 1);
+ Value *StopVal = ConstantInt::get(LCTy, 100);
+ Value *Step = ConstantInt::get(LCTy, 1);
+ auto AllocaIP = Builder.saveIP();
+
+ llvm::Value *ScanVar = Builder.CreateAlloca(Builder.getFloatTy());
+ llvm::Value *OrigVar = Builder.CreateAlloca(Builder.getFloatTy());
+ unsigned NumBodiesGenerated = 0;
+ ScanInfo *ScanRedInfo;
+ ASSERT_EXPECTED_INIT(ScanInfo *, ScanInformation,
+ OMPBuilder.scanInfoInitialize());
+ ScanRedInfo = ScanInformation;
+ auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, llvm::Value *LC) {
+ NumBodiesGenerated += 1;
+ Builder.restoreIP(CodeGenIP);
+ createScan(ScanVar, Builder.getFloatTy(), OMPBuilder, Builder, Loc,
+ AllocaIP, ScanRedInfo);
+ return Error::success();
+ };
+ llvm::SmallVector<CanonicalLoopInfo *> loops;
+ ASSERT_EXPECTED_INIT(llvm::SmallVector<CanonicalLoopInfo *>, loopvec,
+ OMPBuilder.createCanonicalScanLoops(
+ Loc, LoopBodyGenCB, StartVal, StopVal, Step, false,
+ false, Builder.saveIP(), "scan", ScanRedInfo));
+ loops = loopvec;
+ CanonicalLoopInfo *InputLoop = loops.front();
+ CanonicalLoopInfo *ScanLoop = loops.back();
+ Builder.restoreIP(ScanLoop->getAfterIP());
+ InputLoop->assertOK();
+ ScanLoop->assertOK();
+
+ EXPECT_EQ(ScanLoop->getAfter(), Builder.GetInsertBlock());
+ EXPECT_EQ(NumBodiesGenerated, 2U);
+ SmallVector<OpenMPIRBuilder::ReductionInfo> ReductionInfos = {
+ {Builder.getFloatTy(), OrigVar, ScanVar,
+ /*EvaluationKind=*/OpenMPIRBuilder::EvalKind::Scalar, sumReduction,
+ /*ReductionGenClang=*/nullptr, sumAtomicReduction}};
+ OpenMPIRBuilder::LocationDescription RedLoc({InputLoop->getAfterIP(), DL});
+ llvm::BasicBlock *Cont = splitBB(Builder, false, "omp.scan.loop.cont");
+ ASSERT_EXPECTED_INIT(
+ InsertPointTy, retIp,
+ OMPBuilder.emitScanReduction(RedLoc, ReductionInfos, ScanRedInfo));
+ Builder.restoreIP(retIp);
+ Builder.CreateBr(Cont);
+ Builder.SetInsertPoint(Cont);
+ unsigned NumMallocs = 0;
+ unsigned NumFrees = 0;
+ unsigned NumMasked = 0;
+ unsigned NumEndMasked = 0;
+ unsigned NumLog = 0;
+ unsigned NumCeil = 0;
+ for (Instruction &I : instructions(F)) {
+ if (!isa<CallInst>(I))
+ continue;
+ CallInst *Call = dyn_cast<CallInst>(&I);
+ StringRef Name = Call->getCalledFunction()->getName();
+ if (Name.equals_insensitive("malloc")) {
+ NumMallocs += 1;
+ } else if (Name.equals_insensitive("free")) {
+ NumFrees += 1;
+ } else if (Name.equals_insensitive("__kmpc_masked")) {
+ NumMasked += 1;
+ } else if (Name.equals_insensitive("__kmpc_end_masked")) {
+ NumEndMasked += 1;
+ } else if (Name.equals_insensitive("llvm.log2.f64")) {
+ NumLog += 1;
+ } else if (Name.equals_insensitive("llvm.ceil.f64")) {
+ NumCeil += 1;
+ }
+ }
+ EXPECT_EQ(NumBodiesGenerated, 2U);
+ EXPECT_EQ(NumMasked, 3U);
+ EXPECT_EQ(NumEndMasked, 3U);
+ EXPECT_EQ(NumMallocs, 1U);
+ EXPECT_EQ(NumFrees, 1U);
+ EXPECT_EQ(NumLog, 1U);
+ EXPECT_EQ(NumCeil, 1U);
+}
+
TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
OpenMPIRBuilder OMPBuilder(*M);