diff options
author | Michael Kruse <llvm-project@meinersbur.de> | 2022-02-25 17:44:14 -0600 |
---|---|---|
committer | Michael Kruse <llvm-project@meinersbur.de> | 2022-02-28 18:18:33 -0600 |
commit | a66f7769a3df711ff96f3832f5c71899ac671218 (patch) | |
tree | 8ff13f1ec38b0cc3003abe70b0d79c97dbc9f23f /llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | |
parent | 73b193aec21d5203bde76c76bd0a29c6f77daf36 (diff) | |
download | llvm-a66f7769a3df711ff96f3832f5c71899ac671218.zip llvm-a66f7769a3df711ff96f3832f5c71899ac671218.tar.gz llvm-a66f7769a3df711ff96f3832f5c71899ac671218.tar.bz2 |
[OpenMPIRBuilder] Implement static-chunked workshare-loop schedules.
Add applyStaticChunkedWorkshareLoop method implementing static schedule when chunk-size is specified. Unlike a static schedule without chunk-size (where chunk-size is chosen by the runtime such that each thread receives one chunk), we need two nested loops: one for looping over the iterations of a chunk, and a second for looping over all chunks assigned to the threads.
This patch includes the following related changes:
* Adapt applyWorkshareLoop to triage between the schedule types, now possible since all schedules have been implemented. The default schedule is assumed to be non-chunked static, as without OpenMPIRBuilder.
* Remove the chunk parameter from applyStaticWorkshareLoop, it is ignored by the runtime. Change the value for the value passed to the init function to 0, as without OpenMPIRBuilder.
* Refactor CanonicalLoopInfo::setTripCount and CanonicalLoopInfo::mapIndVar as used by both, applyStaticWorkshareLoop and applyStaticChunkedWorkshareLoop.
* Enable Clang to use the OpenMPIRBuilder in the presence of the schedule clause.
Differential Revision: https://reviews.llvm.org/D114413
Diffstat (limited to 'llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp')
-rw-r--r-- | llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 143 |
1 files changed, 120 insertions, 23 deletions
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 48f720b..54f7972 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -113,6 +113,33 @@ verifyListOrder(Function *F, ArrayRef<BasicBlock *> RefOrder) { << " in function " << F->getName(); } +/// Populate Calls with call instructions calling the function with the given +/// FnID from the given function F. +static void findCalls(Function *F, omp::RuntimeFunction FnID, + OpenMPIRBuilder &OMPBuilder, + SmallVectorImpl<CallInst *> &Calls) { + Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); + for (BasicBlock &BB : *F) { + for (Instruction &I : BB) { + auto *Call = dyn_cast<CallInst>(&I); + if (Call && Call->getCalledFunction() == Fn) + Calls.push_back(Call); + } + } +} + +/// Assuming \p F contains only one call to the function with the given \p FnID, +/// return that call. +static CallInst *findSingleCall(Function *F, omp::RuntimeFunction FnID, + OpenMPIRBuilder &OMPBuilder) { + SmallVector<CallInst *, 1> Calls; + findCalls(F, FnID, OMPBuilder, Calls); + EXPECT_EQ(1u, Calls.size()); + if (Calls.size() != 1) + return nullptr; + return Calls.front(); +} + class OpenMPIRBuilderTest : public testing::Test { protected: void SetUp() override { @@ -147,7 +174,8 @@ protected: /// loop counter for use with tests that need a CanonicalLoopInfo object. CanonicalLoopInfo *buildSingleLoopFunction(DebugLoc DL, OpenMPIRBuilder &OMPBuilder, - Instruction **Call = nullptr, + int UseIVBits, + CallInst **Call = nullptr, BasicBlock **BodyCode = nullptr) { OMPBuilder.initialize(); F->setName("func"); @@ -156,6 +184,10 @@ protected: OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); Value *TripCount = F->getArg(0); + Type *IVType = Type::getIntNTy(Builder.getContext(), UseIVBits); + Value *CastedTripCount = + Builder.CreateZExtOrTrunc(TripCount, IVType, "tripcount"); + auto LoopBodyGenCB = [&](OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *LC) { Builder.restoreIP(CodeGenIP); @@ -168,7 +200,7 @@ protected: *Call = CallInst; }; CanonicalLoopInfo *Loop = - OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, TripCount); + OMPBuilder.createCanonicalLoop(Loc, LoopBodyGenCB, CastedTripCount); // Finalize the function. Builder.restoreIP(Loop->getAfterIP()); @@ -188,6 +220,10 @@ class OpenMPIRBuilderTestWithParams : public OpenMPIRBuilderTest, public ::testing::WithParamInterface<omp::OMPScheduleType> {}; +class OpenMPIRBuilderTestWithIVBits + : public OpenMPIRBuilderTest, + public ::testing::WithParamInterface<int> {}; + // Returns the value stored in the given allocation. Returns null if the given // value is not a result of an InstTy instruction, if no value is stored or if // there is more than one store. @@ -1387,10 +1423,10 @@ TEST_F(OpenMPIRBuilderTest, CollapseNestedLoops) { TEST_F(OpenMPIRBuilderTest, TileSingleLoop) { OpenMPIRBuilder OMPBuilder(*M); - Instruction *Call; + CallInst *Call; BasicBlock *BodyCode; CanonicalLoopInfo *Loop = - buildSingleLoopFunction(DL, OMPBuilder, &Call, &BodyCode); + buildSingleLoopFunction(DL, OMPBuilder, 32, &Call, &BodyCode); Instruction *OrigIndVar = Loop->getIndVar(); EXPECT_EQ(Call->getOperand(1), OrigIndVar); @@ -1730,7 +1766,7 @@ TEST_F(OpenMPIRBuilderTest, TileSingleLoopCounts) { TEST_F(OpenMPIRBuilderTest, ApplySimd) { OpenMPIRBuilder OMPBuilder(*M); - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Simd-ize the loop. OMPBuilder.applySimd(DL, CLI); @@ -1761,7 +1797,7 @@ TEST_F(OpenMPIRBuilderTest, ApplySimd) { TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { OpenMPIRBuilder OMPBuilder(*M); - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Unroll the loop. OMPBuilder.unrollLoopFull(DL, CLI); @@ -1784,7 +1820,7 @@ TEST_F(OpenMPIRBuilderTest, UnrollLoopFull) { TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { OpenMPIRBuilder OMPBuilder(*M); - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Unroll the loop. CanonicalLoopInfo *UnrolledLoop = nullptr; @@ -1818,7 +1854,7 @@ TEST_F(OpenMPIRBuilderTest, UnrollLoopPartial) { TEST_F(OpenMPIRBuilderTest, UnrollLoopHeuristic) { OpenMPIRBuilder OMPBuilder(*M); - CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder); + CanonicalLoopInfo *CLI = buildSingleLoopFunction(DL, OMPBuilder, 32); // Unroll the loop. OMPBuilder.unrollLoopHeuristic(DL, CLI); @@ -1935,6 +1971,82 @@ TEST_F(OpenMPIRBuilderTest, StaticWorkShareLoop) { EXPECT_EQ(NumCallsInExitBlock, 3u); } +TEST_P(OpenMPIRBuilderTestWithIVBits, StaticChunkedWorkshareLoop) { + int IVBits = GetParam(); + + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + OpenMPIRBuilder OMPBuilder(*M); + + BasicBlock *Body; + CallInst *Call; + CanonicalLoopInfo *CLI = + buildSingleLoopFunction(DL, OMPBuilder, IVBits, &Call, &Body); + + Instruction *OrigIndVar = CLI->getIndVar(); + EXPECT_EQ(Call->getOperand(1), OrigIndVar); + + Type *LCTy = Type::getInt32Ty(Ctx); + Value *ChunkSize = ConstantInt::get(LCTy, 5); + InsertPointTy AllocaIP{&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()}; + OMPBuilder.applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, + /*NeedsBarrier=*/true, ChunkSize); + + OMPBuilder.finalize(); + EXPECT_FALSE(verifyModule(*M, &errs())); + + BasicBlock *Entry = &F->getEntryBlock(); + BasicBlock *Preheader = Entry->getSingleSuccessor(); + + BasicBlock *DispatchPreheader = Preheader->getSingleSuccessor(); + BasicBlock *DispatchHeader = DispatchPreheader->getSingleSuccessor(); + BasicBlock *DispatchCond = DispatchHeader->getSingleSuccessor(); + BasicBlock *DispatchBody = succ_begin(DispatchCond)[0]; + BasicBlock *DispatchExit = succ_begin(DispatchCond)[1]; + BasicBlock *DispatchAfter = DispatchExit->getSingleSuccessor(); + BasicBlock *Return = DispatchAfter->getSingleSuccessor(); + + BasicBlock *ChunkPreheader = DispatchBody->getSingleSuccessor(); + BasicBlock *ChunkHeader = ChunkPreheader->getSingleSuccessor(); + BasicBlock *ChunkCond = ChunkHeader->getSingleSuccessor(); + BasicBlock *ChunkBody = succ_begin(ChunkCond)[0]; + BasicBlock *ChunkExit = succ_begin(ChunkCond)[1]; + BasicBlock *ChunkInc = ChunkBody->getSingleSuccessor(); + BasicBlock *ChunkAfter = ChunkExit->getSingleSuccessor(); + + BasicBlock *DispatchInc = ChunkAfter; + + EXPECT_EQ(ChunkBody, Body); + EXPECT_EQ(ChunkInc->getSingleSuccessor(), ChunkHeader); + EXPECT_EQ(DispatchInc->getSingleSuccessor(), DispatchHeader); + + EXPECT_TRUE(isa<ReturnInst>(Return->front())); + + Value *NewIV = Call->getOperand(1); + EXPECT_EQ(NewIV->getType()->getScalarSizeInBits(), IVBits); + + CallInst *InitCall = findSingleCall( + F, + (IVBits > 32) ? omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u + : omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u, + OMPBuilder); + EXPECT_EQ(InitCall->getParent(), Preheader); + EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(2))->getSExtValue(), 33); + EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(7))->getSExtValue(), 1); + EXPECT_EQ(cast<ConstantInt>(InitCall->getArgOperand(8))->getSExtValue(), 5); + + CallInst *FiniCall = findSingleCall( + F, omp::RuntimeFunction::OMPRTL___kmpc_for_static_fini, OMPBuilder); + EXPECT_EQ(FiniCall->getParent(), DispatchExit); + + CallInst *BarrierCall = findSingleCall( + F, omp::RuntimeFunction::OMPRTL___kmpc_barrier, OMPBuilder); + EXPECT_EQ(BarrierCall->getParent(), DispatchExit); +} + +INSTANTIATE_TEST_SUITE_P(IVBits, OpenMPIRBuilderTestWithIVBits, + ::testing::Values(8, 16, 32, 64)); + TEST_P(OpenMPIRBuilderTestWithParams, DynamicWorkShareLoop) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); @@ -3283,21 +3395,6 @@ xorAtomicReduction(OpenMPIRBuilder::InsertPointTy IP, Type *Ty, Value *LHS, return Builder.saveIP(); } -/// Populate Calls with call instructions calling the function with the given -/// FnID from the given function F. -static void findCalls(Function *F, omp::RuntimeFunction FnID, - OpenMPIRBuilder &OMPBuilder, - SmallVectorImpl<CallInst *> &Calls) { - Function *Fn = OMPBuilder.getOrCreateRuntimeFunctionPtr(FnID); - for (BasicBlock &BB : *F) { - for (Instruction &I : BB) { - auto *Call = dyn_cast<CallInst>(&I); - if (Call && Call->getCalledFunction() == Fn) - Calls.push_back(Call); - } - } -} - TEST_F(OpenMPIRBuilderTest, CreateReductions) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); |