aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
diff options
context:
space:
mode:
authorNicolai Haehnle <nhaehnle@gmail.com>2016-03-21 20:28:33 +0000
committerNicolai Haehnle <nhaehnle@gmail.com>2016-03-21 20:28:33 +0000
commit213e87f2ee2f5f5fa5704346d2f73df1d61a2f02 (patch)
tree8a56086de35da4d164e198a0b26e93122b564380 /llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
parentb14f4fd0defd496c0c5ac6fc36aa15e9c7f450ce (diff)
downloadllvm-213e87f2ee2f5f5fa5704346d2f73df1d61a2f02.zip
llvm-213e87f2ee2f5f5fa5704346d2f73df1d61a2f02.tar.gz
llvm-213e87f2ee2f5f5fa5704346d2f73df1d61a2f02.tar.bz2
AMDGPU: Add SIWholeQuadMode pass
Summary: Whole quad mode is already enabled for pixel shaders that compute derivatives, but it must be suspended for instructions that cause a shader to have side effects (i.e. stores and atomics). This pass addresses the issue by storing the real (initial) live mask in a register, masking EXEC before instructions that require exact execution and (re-)enabling WQM where required. This pass is run before register coalescing so that we can use machine SSA for analysis. The changes in this patch expose a problem with the second machine scheduling pass: target independent instructions like COPY implicitly use EXEC when they operate on VGPRs, but this fact is not encoded in the MIR. This can lead to miscompilation because instructions are moved past changes to EXEC. This patch fixes the problem by adding use-implicit operands to target independent instructions. Some general codegen passes are relaxed to work with such implicit use operands. Reviewers: arsenm, tstellarAMD, mareko Subscribers: MatzeB, arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18162 llvm-svn: 263982
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp33
1 files changed, 21 insertions, 12 deletions
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index a804a5e..c4a06ef 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -78,7 +78,7 @@ private:
void SkipIfDead(MachineInstr &MI);
void If(MachineInstr &MI);
- void Else(MachineInstr &MI);
+ void Else(MachineInstr &MI, bool ExecModified);
void Break(MachineInstr &MI);
void IfBreak(MachineInstr &MI);
void ElseBreak(MachineInstr &MI);
@@ -215,7 +215,7 @@ void SILowerControlFlow::If(MachineInstr &MI) {
MI.eraseFromParent();
}
-void SILowerControlFlow::Else(MachineInstr &MI) {
+void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) {
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MI.getDebugLoc();
unsigned Dst = MI.getOperand(0).getReg();
@@ -225,6 +225,15 @@ void SILowerControlFlow::Else(MachineInstr &MI) {
TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
.addReg(Src); // Saved EXEC
+ if (ExecModified) {
+ // Adjust the saved exec to account for the modifications during the flow
+ // block that contains the ELSE. This can happen when WQM mode is switched
+ // off.
+ BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
+ .addReg(AMDGPU::EXEC)
+ .addReg(Dst);
+ }
+
BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
.addReg(AMDGPU::EXEC)
.addReg(Dst);
@@ -488,7 +497,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
- bool NeedWQM = false;
bool NeedFlat = false;
unsigned Depth = 0;
@@ -498,17 +506,24 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
MachineBasicBlock *EmptyMBBAtEnd = NULL;
MachineBasicBlock &MBB = *BI;
MachineBasicBlock::iterator I, Next;
+ bool ExecModified = false;
+
for (I = MBB.begin(); I != MBB.end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
- if (TII->isWQM(MI) || TII->isDS(MI))
- NeedWQM = true;
// Flat uses m0 in case it needs to access LDS.
if (TII->isFLAT(MI))
NeedFlat = true;
+ for (const auto &Def : I->defs()) {
+ if (Def.isReg() && Def.isDef() && Def.getReg() == AMDGPU::EXEC) {
+ ExecModified = true;
+ break;
+ }
+ }
+
switch (MI.getOpcode()) {
default: break;
case AMDGPU::SI_IF:
@@ -517,7 +532,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
break;
case AMDGPU::SI_ELSE:
- Else(MI);
+ Else(MI, ExecModified);
break;
case AMDGPU::SI_BREAK:
@@ -599,12 +614,6 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
}
}
- if (NeedWQM && MFI->getShaderType() == ShaderType::PIXEL) {
- MachineBasicBlock &MBB = MF.front();
- BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
- AMDGPU::EXEC).addReg(AMDGPU::EXEC);
- }
-
if (NeedFlat && MFI->IsKernel) {
// TODO: What to use with function calls?
// We will need to Initialize the flat scratch register pair.