1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
|
//===-- RISCVInsertWriteVXRM.cpp - Insert Write of RISC-V VXRM CSR --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass inserts writes to the VXRM CSR as needed by vector instructions.
// Each instruction that uses VXRM carries an operand that contains its required
// VXRM value. This pass tries to optimize placement to avoid redundant writes
// to VXRM.
//
// This is done using 2 dataflow algorithms. The first is a forward data flow
// to calculate where a VXRM value is available. The second is a backwards
// dataflow to determine where a VXRM value is anticipated.
//
// Finally, we use the results of these two dataflows to insert VXRM writes
// where a value is anticipated, but not available.
//
// FIXME: This pass does not split critical edges, so there can still be some
// redundancy.
//
// FIXME: If we are willing to have writes that aren't always needed, we could
// reduce the number of VXRM writes in some cases.
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include <queue>
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-write-vxrm"
#define RISCV_INSERT_WRITE_VXRM_NAME "RISC-V Insert Write VXRM Pass"
namespace {
class VXRMInfo {
uint8_t VXRMImm = 0;
enum : uint8_t {
Uninitialized,
Static,
Unknown,
} State = Uninitialized;
public:
VXRMInfo() {}
static VXRMInfo getUnknown() {
VXRMInfo Info;
Info.setUnknown();
return Info;
}
bool isValid() const { return State != Uninitialized; }
void setUnknown() { State = Unknown; }
bool isUnknown() const { return State == Unknown; }
bool isStatic() const { return State == Static; }
void setVXRMImm(unsigned Imm) {
assert(Imm <= 3 && "Unexpected VXRM value");
VXRMImm = Imm;
State = Static;
}
unsigned getVXRMImm() const {
assert(isStatic() && VXRMImm <= 3 && "Unexpected state");
return VXRMImm;
}
bool operator==(const VXRMInfo &Other) const {
// Uninitialized is only equal to another Uninitialized.
if (State != Other.State)
return false;
if (isStatic())
return VXRMImm == Other.VXRMImm;
assert((isValid() || isUnknown()) && "Unexpected state");
return true;
}
bool operator!=(const VXRMInfo &Other) const { return !(*this == Other); }
// Calculate the VXRMInfo visible to a block assuming this and Other are
// both predecessors.
VXRMInfo intersect(const VXRMInfo &Other) const {
// If the new value isn't valid, ignore it.
if (!Other.isValid())
return *this;
// If this value isn't valid, this must be the first predecessor, use it.
if (!isValid())
return Other;
// If either is unknown, the result is unknown.
if (isUnknown() || Other.isUnknown())
return VXRMInfo::getUnknown();
// If we have an exact match, return this.
if (*this == Other)
return *this;
// Otherwise the result is unknown.
return VXRMInfo::getUnknown();
}
// Calculate the VXRMInfo visible to a block assuming this and Other
// are both predecessors. To allow speculatively running WriteVXRM
// we will ignore Unknowns if one of this and Other have valid
// WriteVXRM. Rationale: WriteVXRM causes a pipeline flush in some
// uarchs and moving it outside loops is very important for some
// workloads.
VXRMInfo intersectAnticipated(const VXRMInfo &Other) const {
// If the new value isn't valid, ignore it.
if (!Other.isValid())
return *this;
// If this value isn't valid, this must be the first predecessor, use it.
if (!isValid())
return Other;
// If either is unknown, the result is the other one.
if (isUnknown())
return Other;
if (Other.isUnknown())
return *this;
// If we have an exact match, return this.
if (*this == Other)
return *this;
// Otherwise the result is unknown.
return VXRMInfo::getUnknown();
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Support for debugging, callable in GDB: V->dump()
LLVM_DUMP_METHOD void dump() const {
print(dbgs());
dbgs() << "\n";
}
void print(raw_ostream &OS) const {
OS << '{';
if (!isValid())
OS << "Uninitialized";
else if (isUnknown())
OS << "Unknown";
else
OS << getVXRMImm();
OS << '}';
}
#endif
};
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_ATTRIBUTE_USED
inline raw_ostream &operator<<(raw_ostream &OS, const VXRMInfo &V) {
V.print(OS);
return OS;
}
#endif
struct BlockData {
// Indicates if the block uses VXRM. Uninitialized means no use.
VXRMInfo VXRMUse;
// Indicates the VXRM output from the block. Uninitialized means transparent.
VXRMInfo VXRMOut;
// Keeps track of the available VXRM value at the start of the basic block.
VXRMInfo AvailableIn;
// Keeps track of the available VXRM value at the end of the basic block.
VXRMInfo AvailableOut;
// Keeps track of what VXRM is anticipated at the start of the basic block.
VXRMInfo AnticipatedIn;
// Keeps track of what VXRM is anticipated at the end of the basic block.
VXRMInfo AnticipatedOut;
// Keeps track of whether the block is already in the queue.
bool InQueue;
BlockData() = default;
};
class RISCVInsertWriteVXRM : public MachineFunctionPass {
const TargetInstrInfo *TII;
std::vector<BlockData> BlockInfo;
std::queue<const MachineBasicBlock *> WorkList;
public:
static char ID;
RISCVInsertWriteVXRM() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
StringRef getPassName() const override {
return RISCV_INSERT_WRITE_VXRM_NAME;
}
private:
bool computeVXRMChanges(const MachineBasicBlock &MBB);
void computeAvailable(const MachineBasicBlock &MBB);
void computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB);
void emitWriteVXRM(MachineBasicBlock &MBB);
};
} // end anonymous namespace
char RISCVInsertWriteVXRM::ID = 0;
INITIALIZE_PASS(RISCVInsertWriteVXRM, DEBUG_TYPE, RISCV_INSERT_WRITE_VXRM_NAME,
false, false)
bool RISCVInsertWriteVXRM::computeVXRMChanges(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
bool NeedVXRMWrite = false;
for (const MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (!BBInfo.VXRMUse.isValid())
BBInfo.VXRMUse.setVXRMImm(NewVXRMImm);
BBInfo.VXRMOut.setVXRMImm(NewVXRMImm);
NeedVXRMWrite = true;
continue;
}
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr)) {
if (!BBInfo.VXRMUse.isValid())
BBInfo.VXRMUse.setUnknown();
BBInfo.VXRMOut.setUnknown();
}
}
return NeedVXRMWrite;
}
void RISCVInsertWriteVXRM::computeAvailable(const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
BBInfo.InQueue = false;
VXRMInfo Available;
if (MBB.pred_empty()) {
Available.setUnknown();
} else {
for (const MachineBasicBlock *P : MBB.predecessors())
Available = Available.intersect(BlockInfo[P->getNumber()].AvailableOut);
}
// If we don't have any valid available info, wait until we do.
if (!Available.isValid())
return;
if (Available != BBInfo.AvailableIn) {
BBInfo.AvailableIn = Available;
LLVM_DEBUG(dbgs() << "AvailableIn state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.AvailableIn << "\n");
}
if (BBInfo.VXRMOut.isValid())
Available = BBInfo.VXRMOut;
if (Available == BBInfo.AvailableOut)
return;
BBInfo.AvailableOut = Available;
LLVM_DEBUG(dbgs() << "AvailableOut state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.AvailableOut << "\n");
// Add the successors to the work list so that we can propagate.
for (MachineBasicBlock *S : MBB.successors()) {
if (!BlockInfo[S->getNumber()].InQueue) {
BlockInfo[S->getNumber()].InQueue = true;
WorkList.push(S);
}
}
}
void RISCVInsertWriteVXRM::computeAnticipated(const MachineFunction &MF, const MachineBasicBlock &MBB) {
BlockData &BBInfo = BlockInfo[MBB.getNumber()];
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
BBInfo.InQueue = false;
VXRMInfo Anticipated;
if (MBB.succ_empty()) {
Anticipated.setUnknown();
} else {
for (const MachineBasicBlock *S : MBB.successors())
if (ST.hasVXRMPipelineFlush())
Anticipated =
Anticipated.intersectAnticipated(BlockInfo[S->getNumber()].AnticipatedIn);
else
Anticipated =
Anticipated.intersect(BlockInfo[S->getNumber()].AnticipatedIn);
}
// If we don't have any valid anticipated info, wait until we do.
if (!Anticipated.isValid())
return;
if (Anticipated != BBInfo.AnticipatedOut) {
BBInfo.AnticipatedOut = Anticipated;
LLVM_DEBUG(dbgs() << "AnticipatedOut state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.AnticipatedOut << "\n");
}
// If this block reads VXRM, copy it.
if (BBInfo.VXRMUse.isValid())
Anticipated = BBInfo.VXRMUse;
if (Anticipated == BBInfo.AnticipatedIn)
return;
BBInfo.AnticipatedIn = Anticipated;
LLVM_DEBUG(dbgs() << "AnticipatedIn state of " << printMBBReference(MBB)
<< " changed to " << BBInfo.AnticipatedIn << "\n");
// Add the predecessors to the work list so that we can propagate.
for (MachineBasicBlock *P : MBB.predecessors()) {
if (!BlockInfo[P->getNumber()].InQueue) {
BlockInfo[P->getNumber()].InQueue = true;
WorkList.push(P);
}
}
}
void RISCVInsertWriteVXRM::emitWriteVXRM(MachineBasicBlock &MBB) {
const BlockData &BBInfo = BlockInfo[MBB.getNumber()];
VXRMInfo Info = BBInfo.AvailableIn;
// Flag to indicates we need to insert a VXRM write. We want to delay it as
// late as possible in this block.
bool PendingInsert = false;
// Insert VXRM write if anticipated and not available.
if (BBInfo.AnticipatedIn.isStatic()) {
// If this is the entry block and the value is anticipated, insert.
if (MBB.isEntryBlock()) {
PendingInsert = true;
} else {
// Search for any predecessors that wouldn't satisfy our requirement and
// insert a write VXRM if needed.
// NOTE: If one predecessor is able to provide the requirement, but
// another isn't, it means we have a critical edge. The better placement
// would be to split the critical edge.
for (MachineBasicBlock *P : MBB.predecessors()) {
const BlockData &PInfo = BlockInfo[P->getNumber()];
// If it's available out of the predecessor, then we're ok.
if (PInfo.AvailableOut.isStatic() &&
PInfo.AvailableOut.getVXRMImm() ==
BBInfo.AnticipatedIn.getVXRMImm())
continue;
// If the predecessor anticipates this value for all its successors,
// then a write to VXRM would have already occurred before this block is
// executed.
if (PInfo.AnticipatedOut.isStatic() &&
PInfo.AnticipatedOut.getVXRMImm() ==
BBInfo.AnticipatedIn.getVXRMImm())
continue;
PendingInsert = true;
break;
}
}
Info = BBInfo.AnticipatedIn;
}
for (MachineInstr &MI : MBB) {
int VXRMIdx = RISCVII::getVXRMOpNum(MI.getDesc());
if (VXRMIdx >= 0 && !RISCVInstrInfo::ignoresVXRM(MI)) {
unsigned NewVXRMImm = MI.getOperand(VXRMIdx).getImm();
if (PendingInsert || !Info.isStatic() ||
Info.getVXRMImm() != NewVXRMImm) {
assert((!PendingInsert ||
(Info.isStatic() && Info.getVXRMImm() == NewVXRMImm)) &&
"Pending VXRM insertion mismatch");
LLVM_DEBUG(dbgs() << "Inserting before "; MI.print(dbgs()));
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
.addImm(NewVXRMImm);
PendingInsert = false;
}
MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
/*IsImp*/ true));
Info.setVXRMImm(NewVXRMImm);
continue;
}
if (MI.isCall() || MI.isInlineAsm() ||
MI.modifiesRegister(RISCV::VXRM, /*TRI=*/nullptr))
Info.setUnknown();
}
// If all our successors anticipate a value, do the insert.
// NOTE: It's possible that not all predecessors of our successor provide the
// correct value. This can occur on critical edges. If we don't split the
// critical edge we'll also have a write vxrm in the successor that is
// redundant with this one.
if (PendingInsert ||
(BBInfo.AnticipatedOut.isStatic() &&
(!Info.isStatic() ||
Info.getVXRMImm() != BBInfo.AnticipatedOut.getVXRMImm()))) {
assert((!PendingInsert ||
(Info.isStatic() && BBInfo.AnticipatedOut.isStatic() &&
Info.getVXRMImm() == BBInfo.AnticipatedOut.getVXRMImm())) &&
"Pending VXRM insertion mismatch");
LLVM_DEBUG(dbgs() << "Inserting at end of " << printMBBReference(MBB)
<< " changing to " << BBInfo.AnticipatedOut << "\n");
BuildMI(MBB, MBB.getFirstTerminator(), DebugLoc(),
TII->get(RISCV::WriteVXRMImm))
.addImm(BBInfo.AnticipatedOut.getVXRMImm());
}
}
bool RISCVInsertWriteVXRM::runOnMachineFunction(MachineFunction &MF) {
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
if (!ST.hasVInstructions())
return false;
TII = ST.getInstrInfo();
assert(BlockInfo.empty() && "Expect empty block infos");
BlockInfo.resize(MF.getNumBlockIDs());
// Phase 1 - collect block information.
bool NeedVXRMChange = false;
for (const MachineBasicBlock &MBB : MF)
NeedVXRMChange |= computeVXRMChanges(MBB);
if (!NeedVXRMChange) {
BlockInfo.clear();
return false;
}
// Phase 2 - Compute available VXRM using a forward walk.
for (const MachineBasicBlock &MBB : MF) {
WorkList.push(&MBB);
BlockInfo[MBB.getNumber()].InQueue = true;
}
while (!WorkList.empty()) {
const MachineBasicBlock &MBB = *WorkList.front();
WorkList.pop();
computeAvailable(MBB);
}
// Phase 3 - Compute anticipated VXRM using a backwards walk.
for (const MachineBasicBlock &MBB : llvm::reverse(MF)) {
WorkList.push(&MBB);
BlockInfo[MBB.getNumber()].InQueue = true;
}
while (!WorkList.empty()) {
const MachineBasicBlock &MBB = *WorkList.front();
WorkList.pop();
computeAnticipated(MF, MBB);
}
// Phase 4 - Emit VXRM writes at the earliest place possible.
for (MachineBasicBlock &MBB : MF)
emitWriteVXRM(MBB);
BlockInfo.clear();
return true;
}
FunctionPass *llvm::createRISCVInsertWriteVXRMPass() {
return new RISCVInsertWriteVXRM();
}
|