diff options
author | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 21:55:46 +0000 |
---|---|---|
committer | Tom Stellard <thomas.stellard@amd.com> | 2014-01-22 21:55:46 +0000 |
commit | e89373e0622724f99194276d3a97ad6052db41a8 (patch) | |
tree | 46631c02c31871551acf7ed1a631ad6f0e850ef8 /llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp | |
parent | 59ed4794c4fff1456c7afc970a36266ba505ae48 (diff) | |
download | llvm-e89373e0622724f99194276d3a97ad6052db41a8.zip llvm-e89373e0622724f99194276d3a97ad6052db41a8.tar.gz llvm-e89373e0622724f99194276d3a97ad6052db41a8.tar.bz2 |
R600: Add work-around for the CF stack entry HW bug
The CF stack can be corrupted if you use CF_ALU_PUSH_BEFORE,
CF_ALU_ELSE_AFTER, CF_ALU_BREAK, or CF_ALU_CONTINUE when the number of
sub-entries on the stack is greater than or equal to the stack entry
size and sub-entries modulo 4 is either 0 or 3 (on cedar the bug is
present when number of sub-entries module 8 is either 7 or 0)
We choose to be conservative and always apply the work-around when the
number of sub-enries is greater than or equal to the stack entry size,
so that we can safely over-allocate the stack when we are unsure of the
stack allocation rules.
reviewed-by: Vincent Lejeune <vljn at ovi.com>
llvm-svn: 199842
Diffstat (limited to 'llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp')
-rw-r--r-- | llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp index 6b42a7a..470ff2e 100644 --- a/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/llvm/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -73,6 +73,44 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) { return false; } +bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { + if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() && + getLoopDepth() > 1) + return true; + + if (!ST.hasCFAluBug()) + return false; + + switch(Opcode) { + default: return false; + case AMDGPU::CF_ALU_PUSH_BEFORE: + case AMDGPU::CF_ALU_ELSE_AFTER: + case AMDGPU::CF_ALU_BREAK: + case AMDGPU::CF_ALU_CONTINUE: + if (CurrentSubEntries == 0) + return false; + if (ST.getWavefrontSize() == 64) { + // We are being conservative here. We only require this work-around if + // CurrentSubEntries > 3 && + // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) + // + // We have to be conservative, because we don't know for certain that + // our stack allocation algorithm for Evergreen/NI is correct. Applying this + // work-around when CurrentSubEntries > 3 allows us to over-allocate stack + // resources without any problems. + return CurrentSubEntries > 3; + } else { + assert(ST.getWavefrontSize() == 32); + // We are being conservative here. We only require the work-around if + // CurrentSubEntries > 7 && + // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) + // See the comment on the wavefront size == 64 case for why we are + // being conservative. + return CurrentSubEntries > 7; + } + } +} + unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { switch(Item) { default: @@ -472,9 +510,12 @@ public: if (MI->getOpcode() == AMDGPU::CF_ALU) LastAlu.back() = MI; I++; + bool RequiresWorkAround = + CFStack.requiresWorkAroundForInst(MI->getOpcode()); switch (MI->getOpcode()) { case AMDGPU::CF_ALU_PUSH_BEFORE: - if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) { + if (RequiresWorkAround) { + DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n"); BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG)) .addImm(CfCount + 1) .addImm(1); |