aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-06-20 18:56:32 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-06-20 18:56:32 +0000
commitff3f912e74473ddae39fa261eeeae2ac6777c392 (patch)
tree196d1db3be825d3a55b0e26c8c6a1011d64dcf7e
parentf10ca93f34d3c88dfed98bfef28f45c8088d4ace (diff)
downloadllvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.zip
llvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.tar.gz
llvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.tar.bz2
AMDGPU: Do operand folding in program order
Before it was possible to partially fold use instructions before the defs. After the xor is folded into a copy, the same mov can end up in the fold list twice, so on the second attempt it will fail expecting to see a register to fold. llvm-svn: 305821
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp8
-rw-r--r--llvm/test/CodeGen/AMDGPU/fold-operands-order.mir47
2 files changed, 50 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 92d5956..f391f67 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -13,6 +13,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -923,12 +924,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
// level.
bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath();
- for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
- BI != BE; ++BI) {
-
- MachineBasicBlock &MBB = *BI;
+ for (MachineBasicBlock *MBB : depth_first(&MF)) {
MachineBasicBlock::iterator I, Next;
- for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ for (I = MBB->begin(); I != MBB->end(); I = Next) {
Next = std::next(I);
MachineInstr &MI = *I;
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir
new file mode 100644
index 0000000..afde89d6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir
@@ -0,0 +1,47 @@
+# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s
+
+--- |
+ define amdgpu_kernel void @mov_in_use_list_2x() {
+ unreachable
+ }
+
+...
+---
+
+# Blocks should be processed in program order to make sure folds
+# aren't made in users before the def is seen.
+
+# GCN-LABEL: name: mov_in_use_list_2x{{$}}
+# GCN: %2 = V_MOV_B32_e32 0, implicit %exec
+# GCN-NEXT: %3 = COPY undef %0
+
+# GCN: %1 = V_MOV_B32_e32 0, implicit %exec
+
+
+name: mov_in_use_list_2x
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vgpr_32, preferred-register: '' }
+ - { id: 1, class: vgpr_32, preferred-register: '' }
+ - { id: 2, class: vgpr_32, preferred-register: '' }
+ - { id: 3, class: vgpr_32, preferred-register: '' }
+liveins:
+body: |
+ bb.0:
+ successors: %bb.2
+
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+
+ %2 = COPY %1
+ %3 = V_XOR_B32_e64 killed %2, undef %0, implicit %exec
+
+ bb.2:
+ successors: %bb.1
+
+ %1 = V_MOV_B32_e32 0, implicit %exec
+ S_BRANCH %bb.1
+
+...