aboutsummaryrefslogtreecommitdiff
path: root/bolt/lib/Passes/LoopInversionPass.cpp
blob: 250a971d204c0a9f3ee0c72b223771f52c7b81a0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
//===- bolt/Passes/LoopInversionPass.cpp ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the LoopInversionPass class.
//
//===----------------------------------------------------------------------===//

#include "bolt/Passes/LoopInversionPass.h"
#include "bolt/Core/ParallelUtilities.h"

using namespace llvm;

namespace opts {
extern cl::OptionCategory BoltCategory;

extern cl::opt<bolt::ReorderBasicBlocks::LayoutType> ReorderBlocks;

static cl::opt<bool> LoopReorder(
    "loop-inversion-opt",
    cl::desc("reorder unconditional jump instructions in loops optimization"),
    cl::init(true), cl::cat(BoltCategory), cl::ReallyHidden);
} // namespace opts

namespace llvm {
namespace bolt {

bool LoopInversionPass::runOnFunction(BinaryFunction &BF) {
  bool IsChanged = false;
  if (BF.getLayout().block_size() < 3 || !BF.hasValidProfile())
    return false;

  BF.getLayout().updateLayoutIndices();
  for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
    if (BB->succ_size() != 1 || BB->pred_size() != 1)
      continue;

    BinaryBasicBlock *SuccBB = *BB->succ_begin();
    BinaryBasicBlock *PredBB = *BB->pred_begin();
    const unsigned BBIndex = BB->getLayoutIndex();
    const unsigned SuccBBIndex = SuccBB->getLayoutIndex();
    if (SuccBB == PredBB && BB != SuccBB && BBIndex != 0 && SuccBBIndex != 0 &&
        SuccBB->succ_size() == 2 &&
        BB->getFragmentNum() == SuccBB->getFragmentNum()) {
      // Get the second successor (after loop BB)
      BinaryBasicBlock *SecondSucc = nullptr;
      for (BinaryBasicBlock *Succ : SuccBB->successors()) {
        if (Succ != &*BB) {
          SecondSucc = Succ;
          break;
        }
      }

      assert(SecondSucc != nullptr && "Unable to find a second BB successor");
      const uint64_t LoopCount = SuccBB->getBranchInfo(*BB).Count;
      const uint64_t ExitCount = SuccBB->getBranchInfo(*SecondSucc).Count;

      if (LoopCount < ExitCount) {
        if (BBIndex > SuccBBIndex)
          continue;
      } else if (BBIndex < SuccBBIndex) {
        continue;
      }

      IsChanged = true;
      BB->setLayoutIndex(SuccBBIndex);
      SuccBB->setLayoutIndex(BBIndex);
    }
  }

  if (IsChanged) {
    BinaryFunction::BasicBlockOrderType NewOrder(BF.getLayout().block_begin(),
                                                 BF.getLayout().block_end());
    llvm::sort(NewOrder, [&](BinaryBasicBlock *BB1, BinaryBasicBlock *BB2) {
      return BB1->getLayoutIndex() < BB2->getLayoutIndex();
    });
    BF.getLayout().update(NewOrder);
  }

  return IsChanged;
}

Error LoopInversionPass::runOnFunctions(BinaryContext &BC) {
  std::atomic<uint64_t> ModifiedFuncCount{0};
  if (opts::ReorderBlocks == ReorderBasicBlocks::LT_NONE ||
      opts::LoopReorder == false)
    return Error::success();

  ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
    if (runOnFunction(BF))
      ++ModifiedFuncCount;
  };

  ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
    return !shouldOptimize(BF);
  };

  ParallelUtilities::runOnEachFunction(
      BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
      "LoopInversionPass");

  BC.outs() << "BOLT-INFO: " << ModifiedFuncCount
            << " Functions were reordered by LoopInversionPass\n";
  return Error::success();
}

} // end namespace bolt
} // end namespace llvm