aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
blob: 1c135f09080e164887c33b00c74c1074209e30e6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
//===- AMDGPURewriteUndefForPHI.cpp ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This file implements the idea to rewrite undef incoming operand for certain
// PHIs in structurized CFG. This pass only works on IR that has gone through
// StructurizedCFG pass, and this pass has some additional limitation that make
// it can only run after SIAnnotateControlFlow.
//
// To achieve optimal code generation for AMDGPU, we assume that uniformity
// analysis reports the PHI in join block of divergent branch as uniform if
// it has one unique uniform value plus additional undefined/poisoned incoming
// value. That is to say the later compiler pipeline will ensure such PHI always
// return uniform value and ensure it work correctly. Let's take a look at two
// typical patterns in structured CFG that need to be taken care: (In both
// patterns, block %if terminate with divergent branch.)
//
// Pattern A: Block with undefined incoming value dominates defined predecessor
//  %if
//  | \
//  | %then
//  | /
//  %endif: %phi = phi [%undef, %if], [%uniform, %then]
//
//  Pattern B: Block with defined incoming value dominates undefined predecessor
//  %if
//  | \
//  | %then
//  | /
//  %endif: %phi = phi [%uniform, %if], [%undef, %then]
//
// For pattern A, by reporting %phi as uniform, the later pipeline need to make
// sure it be handled correctly. The backend usually allocates a scalar register
// and if any thread in a wave takes %then path, the scalar register will get
// the %uniform value.
//
// For pattern B, we will replace the undef operand with the other defined value
// in this pass. So the scalar register allocated for such PHI will get correct
// liveness. Without this transformation, the scalar register may be overwritten
// in the %then block.
//
// Limitation note:
// If the join block of divergent threads is a loop header, the pass cannot
// handle it correctly right now. For below case, the undef in %phi should also
// be rewritten. Currently we depend on SIAnnotateControlFlow to split %header
// block to get a separate join block, then we can rewrite the undef correctly.
//     %if
//     | \
//     | %then
//     | /
// -> %header: %phi = phi [%uniform, %if], [%undef, %then], [%uniform2, %header]
// |   |
// \---

#include "AMDGPU.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"

using namespace llvm;

#define DEBUG_TYPE "amdgpu-rewrite-undef-for-phi"

namespace {

class AMDGPURewriteUndefForPHILegacy : public FunctionPass {
public:
  static char ID;
  AMDGPURewriteUndefForPHILegacy() : FunctionPass(ID) {}
  bool runOnFunction(Function &F) override;
  StringRef getPassName() const override {
    return "AMDGPU Rewrite Undef for PHI";
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<UniformityInfoWrapperPass>();
    AU.addRequired<DominatorTreeWrapperPass>();

    AU.addPreserved<DominatorTreeWrapperPass>();
    AU.setPreservesCFG();
  }
};

} // end anonymous namespace
char AMDGPURewriteUndefForPHILegacy::ID = 0;

INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
                      "Rewrite undef for PHI", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(AMDGPURewriteUndefForPHILegacy, DEBUG_TYPE,
                    "Rewrite undef for PHI", false, false)

bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
  bool Changed = false;
  SmallVector<PHINode *> ToBeDeleted;
  for (auto &BB : F) {
    for (auto &PHI : BB.phis()) {
      if (UA.isDivergent(&PHI))
        continue;

      // The unique incoming value except undef/poison for the PHI node.
      Value *UniqueDefinedIncoming = nullptr;
      // The divergent block with defined incoming value that dominates all
      // other block with the same incoming value.
      BasicBlock *DominateBB = nullptr;
      // Predecessors with undefined incoming value (excluding loop backedge).
      SmallVector<BasicBlock *> Undefs;

      for (unsigned i = 0; i < PHI.getNumIncomingValues(); i++) {
        Value *Incoming = PHI.getIncomingValue(i);
        BasicBlock *IncomingBB = PHI.getIncomingBlock(i);

        if (Incoming == &PHI)
          continue;

        if (isa<UndefValue>(Incoming)) {
          // Undef from loop backedge will not be replaced.
          if (!DT->dominates(&BB, IncomingBB))
            Undefs.push_back(IncomingBB);
          continue;
        }

        if (!UniqueDefinedIncoming) {
          UniqueDefinedIncoming = Incoming;
          DominateBB = IncomingBB;
        } else if (Incoming == UniqueDefinedIncoming) {
          // Update DominateBB if necessary.
          if (DT->dominates(IncomingBB, DominateBB))
            DominateBB = IncomingBB;
        } else {
          UniqueDefinedIncoming = nullptr;
          break;
        }
      }
      // We only need to replace the undef for the PHI which is merging
      // defined/undefined values from divergent threads.
      // TODO: We should still be able to replace undef value if the unique
      // value is a Constant.
      if (!UniqueDefinedIncoming || Undefs.empty() ||
          !UA.isDivergent(DominateBB->getTerminator()))
        continue;

      // We only replace the undef when DominateBB truly dominates all the
      // other predecessors with undefined incoming value. Make sure DominateBB
      // dominates BB so that UniqueDefinedIncoming is available in BB and
      // afterwards.
      if (DT->dominates(DominateBB, &BB) && all_of(Undefs, [&](BasicBlock *UD) {
            return DT->dominates(DominateBB, UD);
          })) {
        PHI.replaceAllUsesWith(UniqueDefinedIncoming);
        ToBeDeleted.push_back(&PHI);
        Changed = true;
      }
    }
  }

  for (auto *PHI : ToBeDeleted)
    PHI->eraseFromParent();

  return Changed;
}

bool AMDGPURewriteUndefForPHILegacy::runOnFunction(Function &F) {
  UniformityInfo &UA =
      getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  return rewritePHIs(F, UA, DT);
}

PreservedAnalyses
AMDGPURewriteUndefForPHIPass::run(Function &F, FunctionAnalysisManager &AM) {
  UniformityInfo &UA = AM.getResult<UniformityInfoAnalysis>(F);
  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
  bool Changed = rewritePHIs(F, UA, DT);
  if (Changed) {
    PreservedAnalyses PA;
    PA.preserveSet<CFGAnalyses>();
    return PA;
  }

  return PreservedAnalyses::all();
}

FunctionPass *llvm::createAMDGPURewriteUndefForPHILegacyPass() {
  return new AMDGPURewriteUndefForPHILegacy();
}