1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// Lowering the WWM_COPY instructions for various register classes.
/// AMDGPU target generates WWM_COPY instruction to differentiate WWM
/// copy from COPY. This pass generates the necessary exec mask manipulation
/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
/// COPY.
//
//===----------------------------------------------------------------------===//
#include "SILowerWWMCopies.h"
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/InitializePasses.h"
using namespace llvm;
#define DEBUG_TYPE "si-lower-wwm-copies"
namespace {
class SILowerWWMCopies {
public:
SILowerWWMCopies(LiveIntervals *LIS, SlotIndexes *SI, VirtRegMap *VRM)
: LIS(LIS), Indexes(SI), VRM(VRM) {}
bool run(MachineFunction &MF);
private:
bool isSCCLiveAtMI(const MachineInstr &MI);
void addToWWMSpills(MachineFunction &MF, Register Reg);
LiveIntervals *LIS;
SlotIndexes *Indexes;
VirtRegMap *VRM;
const SIRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
SIMachineFunctionInfo *MFI;
};
class SILowerWWMCopiesLegacy : public MachineFunctionPass {
public:
static char ID;
SILowerWWMCopiesLegacy() : MachineFunctionPass(ID) {
initializeSILowerWWMCopiesLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override { return "SI Lower WWM Copies"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
AU.addUsedIfAvailable<SlotIndexesWrapperPass>();
AU.addUsedIfAvailable<VirtRegMapWrapperLegacy>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // End anonymous namespace.
INITIALIZE_PASS_BEGIN(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(VirtRegMapWrapperLegacy)
INITIALIZE_PASS_END(SILowerWWMCopiesLegacy, DEBUG_TYPE, "SI Lower WWM Copies",
false, false)
char SILowerWWMCopiesLegacy::ID = 0;
char &llvm::SILowerWWMCopiesLegacyID = SILowerWWMCopiesLegacy::ID;
bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
// We can't determine the liveness info if LIS isn't available. Early return
// in that case and always assume SCC is live.
if (!LIS)
return true;
LiveRange &LR =
LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
SlotIndex Idx = LIS->getInstructionIndex(MI);
return LR.liveAt(Idx);
}
// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
// for preserving its entire lanes at function prolog/epilog.
void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
if (Reg.isPhysical())
return;
// FIXME: VRM may be null here.
MCRegister PhysReg = VRM->getPhys(Reg);
assert(PhysReg && "should have allocated a physical register");
MFI->allocateWWMSpill(MF, PhysReg);
}
bool SILowerWWMCopiesLegacy::runOnMachineFunction(MachineFunction &MF) {
auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
auto *LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
auto *SIWrapper = getAnalysisIfAvailable<SlotIndexesWrapperPass>();
auto *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
auto *VRMWrapper = getAnalysisIfAvailable<VirtRegMapWrapperLegacy>();
auto *VRM = VRMWrapper ? &VRMWrapper->getVRM() : nullptr;
SILowerWWMCopies Impl(LIS, Indexes, VRM);
return Impl.run(MF);
}
PreservedAnalyses
SILowerWWMCopiesPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
auto *VRM = MFAM.getCachedResult<VirtRegMapAnalysis>(MF);
SILowerWWMCopies Impl(LIS, Indexes, VRM);
Impl.run(MF);
return PreservedAnalyses::all();
}
bool SILowerWWMCopies::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIInstrInfo *TII = ST.getInstrInfo();
MFI = MF.getInfo<SIMachineFunctionInfo>();
TRI = ST.getRegisterInfo();
MRI = &MF.getRegInfo();
if (!MFI->hasVRegFlags())
return false;
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
if (MI.getOpcode() != AMDGPU::WWM_COPY)
continue;
// TODO: Club adjacent WWM ops between same exec save/restore
assert(TII->isVGPRCopy(MI));
// For WWM vector copies, manipulate the exec mask around the copy
// instruction.
const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsertPt = MI.getIterator();
Register RegForExecCopy = MFI->getSGPRForEXECCopy();
TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
isSCCLiveAtMI(MI), Indexes);
TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
addToWWMSpills(MF, MI.getOperand(0).getReg());
LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
// Lower WWM_COPY back to COPY
MI.setDesc(TII->get(AMDGPU::COPY));
Changed |= true;
}
}
return Changed;
}
|