aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwitstorm95 <witstorm@163.com>2023-08-09 11:21:45 +0800
committerChuanqi Xu <chuaniq.xcq@alibaba-inc.com>2023-08-09 11:21:57 +0800
commit132bb5cc5fd5f91e6a25e8004ac5d6516518f31a (patch)
tree62b58b5c73aeb355c990fff2f0da7e6a1ff51715
parent201c10537a3c1ad4a86b840834c204575d10d589 (diff)
downloadllvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.zip
llvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.tar.gz
llvm-132bb5cc5fd5f91e6a25e8004ac5d6516518f31a.tar.bz2
[NFC][Coroutines] Use a reverse post-order to guide the computation about cross suspend infomation to reach a fixed point faster.
Fixed https://github.com/llvm/llvm-project/issues/62348 Propagate cross suspend point information along reverse post-order. It does not modify the original function, just selects a better traversal order. Before the patch: ``` n: 20000 4.31user 0.11system 0:04.44elapsed 99%CPU (0avgtext+0avgdata 552352maxresident)k 0inputs+8848outputs (0major+126254minor)pagefaults 0swaps n: 40000 11.24user 0.40system 0:11.66elapsed 99%CPU (0avgtext+0avgdata 1788404maxresident)k 0inputs+17600outputs (0major+431105minor)pagefaults 0swaps n: 60000 21.65user 0.96system 0:22.62elapsed 99%CPU (0avgtext+0avgdata 3809836maxresident)k 0inputs+26352outputs (0major+934749minor)pagefaults 0swaps n: 80000 37.05user 1.53system 0:38.58elapsed 99%CPU (0avgtext+0avgdata 6602396maxresident)k 0inputs+35096outputs (0major+1622584minor)pagefaults 0swaps n: 100000 51.87user 2.67system 0:54.54elapsed 99%CPU (0avgtext+0avgdata 10210736maxresident)k 0inputs+43848outputs (0major+2518945minor)pagefaults 0swaps ``` After the patch: ``` n: 20000 3.08user 0.12system 0:03.21elapsed 99%CPU (0avgtext+0avgdata 551012maxresident)k 0inputs+8848outputs (0major+129349minor)pagefaults 0swaps n: 40000 5.88user 0.33system 0:06.22elapsed 99%CPU (0avgtext+0avgdata 1789248maxresident)k 0inputs+17600outputs (0major+435096minor)pagefaults 0swaps n: 60000 8.84user 0.77system 0:09.63elapsed 99%CPU (0avgtext+0avgdata 3807800maxresident)k 0inputs+26352outputs (0major+939119minor)pagefaults 0swaps n: 80000 11.64user 1.58system 0:13.23elapsed 99%CPU (0avgtext+0avgdata 6604708maxresident)k 0inputs+35096outputs (0major+1629566minor)pagefaults 0swaps n: 100000 15.21user 2.56system 0:17.79elapsed 99%CPU (0avgtext+0avgdata 10208828maxresident)k 8inputs+43848outputs (0major+2526611minor)pagefaults 0swaps ``` Reviewed By: MatzeB, ChuanqiXu Differential Revision: https://reviews.llvm.org/D156850
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp34
1 files changed, 18 insertions, 16 deletions
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 1f37327..58115fa 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -63,7 +63,7 @@ public:
llvm::sort(V);
}
- size_t blockToIndex(BasicBlock *BB) const {
+ size_t blockToIndex(BasicBlock const *BB) const {
auto *I = llvm::lower_bound(V, BB);
assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
return I - V.begin();
@@ -112,10 +112,11 @@ class SuspendCrossingInfo {
}
/// Compute the BlockData for the current function in one iteration.
- /// Returns whether the BlockData changes in this iteration.
/// Initialize - Whether this is the first iteration, we can optimize
/// the initial case a little bit by manual loop switch.
- template <bool Initialize = false> bool computeBlockData();
+ /// Returns whether the BlockData changes in this iteration.
+ template <bool Initialize = false>
+ bool computeBlockData(const ReversePostOrderTraversal<Function *> &RPOT);
public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -223,12 +224,14 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
#endif
-template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
- const size_t N = Mapping.size();
+template <bool Initialize>
+bool SuspendCrossingInfo::computeBlockData(
+ const ReversePostOrderTraversal<Function *> &RPOT) {
bool Changed = false;
- for (size_t I = 0; I < N; ++I) {
- auto &B = Block[I];
+ for (const BasicBlock *BB : RPOT) {
+ auto BBNo = Mapping.blockToIndex(BB);
+ auto &B = Block[BBNo];
// We don't need to count the predecessors when initialization.
if constexpr (!Initialize)
@@ -261,7 +264,7 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
if (B.Suspend) {
- // If block S is a suspend block, it should kill all of the blocks it
+ // If block B is a suspend block, it should kill all of the blocks it
// consumes.
B.Kills |= B.Consumes;
} else if (B.End) {
@@ -273,8 +276,8 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
} else {
// This is reached when B block it not Suspend nor coro.end and it
// need to make sure that it is not in the kill set.
- B.KillLoop |= B.Kills[I];
- B.Kills.reset(I);
+ B.KillLoop |= B.Kills[BBNo];
+ B.Kills.reset(BBNo);
}
if constexpr (!Initialize) {
@@ -283,9 +286,6 @@ template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
}
}
- if constexpr (Initialize)
- return true;
-
return Changed;
}
@@ -325,9 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
markSuspendBlock(Save);
}
- computeBlockData</*Initialize=*/true>();
-
- while (computeBlockData())
+ // It is considered to be faster to use RPO traversal for forward-edges
+ // dataflow analysis.
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ computeBlockData</*Initialize=*/true>(RPOT);
+ while (computeBlockData</*Initialize*/ false>(RPOT))
;
LLVM_DEBUG(dump());