diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-06-20 18:56:32 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-06-20 18:56:32 +0000 |
commit | ff3f912e74473ddae39fa261eeeae2ac6777c392 (patch) | |
tree | 196d1db3be825d3a55b0e26c8c6a1011d64dcf7e | |
parent | f10ca93f34d3c88dfed98bfef28f45c8088d4ace (diff) | |
download | llvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.zip llvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.tar.gz llvm-ff3f912e74473ddae39fa261eeeae2ac6777c392.tar.bz2 |
AMDGPU: Do operand folding in program order
Before it was possible to partially fold use instructions
before the defs. After the xor is folded into a copy, the same
mov can end up in the fold list twice, so on the second attempt
it will fail expecting to see a register to fold.
llvm-svn: 305821
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fold-operands-order.mir | 47 |
2 files changed, 50 insertions, 5 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 92d5956..f391f67 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -13,6 +13,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -923,12 +924,9 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) { // level. bool IsIEEEMode = ST->enableIEEEBit(MF) || !MFI->hasNoSignedZerosFPMath(); - for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); - BI != BE; ++BI) { - - MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineBasicBlock::iterator I, Next; - for (I = MBB.begin(); I != MBB.end(); I = Next) { + for (I = MBB->begin(); I != MBB->end(); I = Next) { Next = std::next(I); MachineInstr &MI = *I; diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir new file mode 100644 index 0000000..afde89d6 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-operands-order.mir @@ -0,0 +1,47 @@ +# RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck -check-prefix=GCN %s + +--- | + define amdgpu_kernel void @mov_in_use_list_2x() { + unreachable + } + +... +--- + +# Blocks should be processed in program order to make sure folds +# aren't made in users before the def is seen. + +# GCN-LABEL: name: mov_in_use_list_2x{{$}} +# GCN: %2 = V_MOV_B32_e32 0, implicit %exec +# GCN-NEXT: %3 = COPY undef %0 + +# GCN: %1 = V_MOV_B32_e32 0, implicit %exec + + +name: mov_in_use_list_2x +tracksRegLiveness: true +registers: + - { id: 0, class: vgpr_32, preferred-register: '' } + - { id: 1, class: vgpr_32, preferred-register: '' } + - { id: 2, class: vgpr_32, preferred-register: '' } + - { id: 3, class: vgpr_32, preferred-register: '' } +liveins: +body: | + bb.0: + successors: %bb.2 + + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + + %2 = COPY %1 + %3 = V_XOR_B32_e64 killed %2, undef %0, implicit %exec + + bb.2: + successors: %bb.1 + + %1 = V_MOV_B32_e32 0, implicit %exec + S_BRANCH %bb.1 + +... |