aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
blob: 493b7541cdd8147dc3782f95ea75faea522b8b5f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
//===-- AMDGPURegBankSelect.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// Assign register banks to all register operands of G_ instructions using
/// machine uniformity analysis.
/// Sgpr - uniform values and some lane masks
/// Vgpr - divergent, non S1, values
/// Vcc  - divergent S1 values(lane masks)
/// However in some cases G_ instructions with this register bank assignment
/// can't be inst-selected. This is solved in AMDGPURegBankLegalize.
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUGlobalISelUtils.h"
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/InitializePasses.h"

#define DEBUG_TYPE "amdgpu-regbankselect"

using namespace llvm;
using namespace AMDGPU;

namespace {

class AMDGPURegBankSelect : public MachineFunctionPass {
public:
  static char ID;

  AMDGPURegBankSelect() : MachineFunctionPass(ID) {}

  bool runOnMachineFunction(MachineFunction &MF) override;

  StringRef getPassName() const override {
    return "AMDGPU Register Bank Select";
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<TargetPassConfig>();
    AU.addRequired<GISelCSEAnalysisWrapperPass>();
    AU.addRequired<MachineUniformityAnalysisPass>();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

  // This pass assigns register banks to all virtual registers, and we maintain
  // this property in subsequent passes
  MachineFunctionProperties getSetProperties() const override {
    return MachineFunctionProperties().setRegBankSelected();
  }
};

} // End anonymous namespace.

INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, DEBUG_TYPE,
                      "AMDGPU Register Bank Select", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
                    "AMDGPU Register Bank Select", false, false)

char AMDGPURegBankSelect::ID = 0;

char &llvm::AMDGPURegBankSelectID = AMDGPURegBankSelect::ID;

FunctionPass *llvm::createAMDGPURegBankSelectPass() {
  return new AMDGPURegBankSelect();
}

class RegBankSelectHelper {
  MachineIRBuilder &B;
  MachineRegisterInfo &MRI;
  AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
  const MachineUniformityInfo &MUI;
  const SIRegisterInfo &TRI;
  const RegisterBank *SgprRB;
  const RegisterBank *VgprRB;
  const RegisterBank *VccRB;

public:
  RegBankSelectHelper(MachineIRBuilder &B,
                      AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
                      const MachineUniformityInfo &MUI,
                      const SIRegisterInfo &TRI, const RegisterBankInfo &RBI)
      : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI), TRI(TRI),
        SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
        VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
        VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}

  // Temporal divergence copy: COPY to vgpr with implicit use of $exec inside of
  // the cycle
  // Note: uniformity analysis does not consider that registers with vgpr def
  // are divergent (you can have uniform value in vgpr).
  // - TODO: implicit use of $exec could be implemented as indicator that
  //   instruction is divergent
  bool isTemporalDivergenceCopy(Register Reg) {
    MachineInstr *MI = MRI.getVRegDef(Reg);
    if (!MI->isCopy() || MI->getNumImplicitOperands() != 1)
      return false;

    return MI->implicit_operands().begin()->getReg() == TRI.getExec();
  }

  const RegisterBank *getRegBankToAssign(Register Reg) {
    if (!isTemporalDivergenceCopy(Reg) &&
        (MUI.isUniform(Reg) || ILMA.isS32S64LaneMask(Reg)))
      return SgprRB;
    if (MRI.getType(Reg) == LLT::scalar(1))
      return VccRB;
    return VgprRB;
  }

  // %rc:RegClass(s32) = G_ ...
  // ...
  // %a = G_ ..., %rc
  // ->
  // %rb:RegBank(s32) = G_ ...
  // %rc:RegClass(s32) = COPY %rb
  // ...
  // %a = G_ ..., %rb
  void reAssignRegBankOnDef(MachineInstr &MI, MachineOperand &DefOP,
                            const RegisterBank *RB) {
    // Register that already has Register class got it during pre-inst selection
    // of another instruction. Maybe cross bank copy was required so we insert a
    // copy that can be removed later. This simplifies post regbanklegalize
    // combiner and avoids need to special case some patterns.
    Register Reg = DefOP.getReg();
    LLT Ty = MRI.getType(Reg);
    Register NewReg = MRI.createVirtualRegister({RB, Ty});
    DefOP.setReg(NewReg);

    auto &MBB = *MI.getParent();
    B.setInsertPt(MBB, MBB.SkipPHIsAndLabels(std::next(MI.getIterator())));
    B.buildCopy(Reg, NewReg);

    // The problem was discovered for uniform S1 that was used as both
    // lane mask(vcc) and regular sgpr S1.
    // - lane-mask(vcc) use was by si_if, this use is divergent and requires
    //   non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
    //   sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
    // - the regular sgpr S1(uniform) instruction is now broken since
    //   it uses sreg_64_xexec(S1) which is divergent.

    // Replace virtual registers with register class on generic instructions
    // uses with virtual registers with register bank.
    for (auto &UseMI : make_early_inc_range(MRI.use_instructions(Reg))) {
      if (UseMI.isPreISelOpcode()) {
        for (MachineOperand &Op : UseMI.operands()) {
          if (Op.isReg() && Op.getReg() == Reg)
            Op.setReg(NewReg);
        }
      }
    }
  }

  // %a = G_ ..., %rc
  // ->
  // %rb:RegBank(s32) = COPY %rc
  // %a = G_ ..., %rb
  void constrainRegBankUse(MachineInstr &MI, MachineOperand &UseOP,
                           const RegisterBank *RB) {
    Register Reg = UseOP.getReg();

    LLT Ty = MRI.getType(Reg);
    Register NewReg = MRI.createVirtualRegister({RB, Ty});
    UseOP.setReg(NewReg);

    if (MI.isPHI()) {
      auto DefMI = MRI.getVRegDef(Reg)->getIterator();
      MachineBasicBlock *DefMBB = DefMI->getParent();
      B.setInsertPt(*DefMBB, DefMBB->SkipPHIsAndLabels(std::next(DefMI)));
    } else {
      B.setInstr(MI);
    }

    B.buildCopy(NewReg, Reg);
  }
};

static Register getVReg(MachineOperand &Op) {
  if (!Op.isReg())
    return {};

  // Operands of COPY and G_SI_CALL can be physical registers.
  Register Reg = Op.getReg();
  if (!Reg.isVirtual())
    return {};

  return Reg;
}

bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
  if (MF.getProperties().hasFailedISel())
    return false;

  // Setup the instruction builder with CSE.
  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
  GISelCSEAnalysisWrapper &Wrapper =
      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
  GISelCSEInfo &CSEInfo = Wrapper.get(TPC.getCSEConfig());
  GISelObserverWrapper Observer;
  Observer.addObserver(&CSEInfo);

  CSEMIRBuilder B(MF);
  B.setCSEInfo(&CSEInfo);
  B.setChangeObserver(Observer);

  RAIIDelegateInstaller DelegateInstaller(MF, &Observer);
  RAIIMFObserverInstaller MFObserverInstaller(MF, Observer);

  IntrinsicLaneMaskAnalyzer ILMA(MF);
  MachineUniformityInfo &MUI =
      getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
  MachineRegisterInfo &MRI = *B.getMRI();
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  RegBankSelectHelper RBSHelper(B, ILMA, MUI, *ST.getRegisterInfo(),
                                *ST.getRegBankInfo());
  // Virtual registers at this point don't have register banks.
  // Virtual registers in def and use operands of already inst-selected
  // instruction have register class.

  for (MachineBasicBlock &MBB : MF) {
    for (MachineInstr &MI : MBB) {
      // Vregs in def and use operands of COPY can have either register class
      // or bank. If there is neither on vreg in def operand, assign bank.
      if (MI.isCopy()) {
        Register DefReg = getVReg(MI.getOperand(0));
        if (!DefReg.isValid() || MRI.getRegClassOrNull(DefReg))
          continue;

        assert(!MRI.getRegBankOrNull(DefReg));
        MRI.setRegBank(DefReg, *RBSHelper.getRegBankToAssign(DefReg));
        continue;
      }

      if (!MI.isPreISelOpcode())
        continue;

      // Vregs in def and use operands of G_ instructions need to have register
      // banks assigned. Before this loop possible case are
      // - (1) vreg without register class or bank in def or use operand
      // - (2) vreg with register class in def operand
      // - (3) vreg, defined by G_ instruction, in use operand
      // - (4) vreg, defined by pre-inst-selected instruction, in use operand

      // First three cases are handled in loop through all def operands of G_
      // instructions. For case (1) simply setRegBank. Cases (2) and (3) are
      // handled by reAssignRegBankOnDef.
      for (MachineOperand &DefOP : MI.defs()) {
        Register DefReg = getVReg(DefOP);
        if (!DefReg.isValid())
          continue;

        const RegisterBank *RB = RBSHelper.getRegBankToAssign(DefReg);
        if (MRI.getRegClassOrNull(DefReg))
          RBSHelper.reAssignRegBankOnDef(MI, DefOP, RB);
        else {
          assert(!MRI.getRegBankOrNull(DefReg));
          MRI.setRegBank(DefReg, *RB);
        }
      }

      // Register bank select doesn't modify pre-inst-selected instructions.
      // For case (4) need to insert a copy, handled by constrainRegBankUse.
      for (MachineOperand &UseOP : MI.uses()) {
        Register UseReg = getVReg(UseOP);
        if (!UseReg.isValid())
          continue;

        // Skip case (3).
        if (!MRI.getRegClassOrNull(UseReg) ||
            MRI.getVRegDef(UseReg)->isPreISelOpcode())
          continue;

        // Use with register class defined by pre-inst-selected instruction.
        const RegisterBank *RB = RBSHelper.getRegBankToAssign(UseReg);
        RBSHelper.constrainRegBankUse(MI, UseOP, RB);
      }
    }
  }

  return true;
}