diff options
| author | witstorm95 <witstorm@163.com> | 2023-08-09 11:21:45 +0800 |
|---|---|---|
| committer | Chuanqi Xu <chuaniq.xcq@alibaba-inc.com> | 2023-08-09 11:21:57 +0800 |
| commit | 132bb5cc5fd5f91e6a25e8004ac5d6516518f31a (patch) | |
| tree | 62b58b5c73aeb355c990fff2f0da7e6a1ff51715 | |
| parent | 201c10537a3c1ad4a86b840834c204575d10d589 (diff) | |
| download | llvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.zip llvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.tar.gz llvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.tar.bz2 | |
[NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.
Fixed https://github.com/llvm/llvm-project/issues/62348
Propagate cross suspend point information along reverse post-order.
It does not modify the original function, just selects a better
traversal order.
Before the patch:
```
n: 20000
4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata
552352maxresident)k
0inputs+8848outputs (0major+126254minor)pagefaults 0swaps
n: 40000
11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata
1788404maxresident)k
0inputs+17600outputs (0major+431105minor)pagefaults 0swaps
n: 60000
21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata
3809836maxresident)k
0inputs+26352outputs (0major+934749minor)pagefaults 0swaps
n: 80000
37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata
6602396maxresident)k
0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps
n: 100000
51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata
10210736maxresident)k
0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps
```
After the patch:
```
n: 20000
3.08user 0.12system 0:03.21elapsed 99%CPU (0avgtext+0avgdata
551012maxresident)k
0inputs+8848outputs (0major+129349minor)pagefaults 0swaps
n: 40000
5.88user 0.33system 0:06.22elapsed 99%CPU (0avgtext+0avgdata
1789248maxresident)k
0inputs+17600outputs (0major+435096minor)pagefaults 0swaps
n: 60000
8.84user 0.77system 0:09.63elapsed 99%CPU (0avgtext+0avgdata
3807800maxresident)k
0inputs+26352outputs (0major+939119minor)pagefaults 0swaps
n: 80000
11.64user 1.58system 0:13.23elapsed 99%CPU (0avgtext+0avgdata
6604708maxresident)k
0inputs+35096outputs (0major+1629566minor)pagefaults 0swaps
n: 100000
15.21user 2.56system 0:17.79elapsed 99%CPU (0avgtext+0avgdata
10208828maxresident)k
8inputs+43848outputs (0major+2526611minor)pagefaults 0swaps
```
Reviewed By: MatzeB, ChuanqiXu
Differential Revision: https://reviews.llvm.org/D156850
| -rw-r--r-- | llvm/lib/Transforms/Coroutines/CoroFrame.cpp | 34 |
1 files changed, 18 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 1f37327..58115fa 100644 --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -63,7 +63,7 @@ public: llvm::sort(V); } - size_t blockToIndex(BasicBlock *BB) const { + size_t blockToIndex(BasicBlock const *BB) const { auto *I = llvm::lower_bound(V, BB); assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block"); return I - V.begin(); @@ -112,10 +112,11 @@ class SuspendCrossingInfo { } /// Compute the BlockData for the current function in one iteration. - /// Returns whether the BlockData changes in this iteration. /// Initialize - Whether this is the first iteration, we can optimize /// the initial case a little bit by manual loop switch. - template <bool Initialize = false> bool computeBlockData(); + /// Returns whether the BlockData changes in this iteration. + template <bool Initialize = false> + bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT); public: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -223,12 +224,14 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const { } #endif -template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() { - const size_t N = Mapping.size(); +template <bool Initialize> +bool SuspendCrossingInfo::computeBlockData( + const ReversePostOrderTraversal<Function *> &RPOT) { bool Changed = false; - for (size_t I = 0; I < N; ++I) { - auto &B = Block[I]; + for (const BasicBlock *BB : RPOT) { + auto BBNo = Mapping.blockToIndex(BB); + auto &B = Block[BBNo]; // We don't need to count the predecessors when initialization. if constexpr (!Initialize) @@ -261,7 +264,7 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() { } if (B.Suspend) { - // If block S is a suspend block, it should kill all of the blocks it + // If block B is a suspend block, it should kill all of the blocks it // consumes. B.Kills |= B.Consumes; } else if (B.End) { @@ -273,8 +276,8 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() { } else { // This is reached when B block it not Suspend nor coro.end and it // need to make sure that it is not in the kill set. - B.KillLoop |= B.Kills[I]; - B.Kills.reset(I); + B.KillLoop |= B.Kills[BBNo]; + B.Kills.reset(BBNo); } if constexpr (!Initialize) { @@ -283,9 +286,6 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() { } } - if constexpr (Initialize) - return true; - return Changed; } @@ -325,9 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape) markSuspendBlock(Save); } - computeBlockData</*Initialize=*/true>(); - - while (computeBlockData()) + // It is considered to be faster to use RPO traversal for forward-edges + // dataflow analysis. + ReversePostOrderTraversal<Function *> RPOT(&F); + computeBlockData</*Initialize=*/true>(RPOT); + while (computeBlockData</*Initialize*/ false>(RPOT)) ; LLVM_DEBUG(dump()); |
