diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2021-07-20 13:53:14 -0700 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2021-07-21 15:19:55 -0700 |
| commit | c54c76037b9d7e611fc3c2955b2b9a7934fbb957 (patch) | |
| tree | cf9101a0795f5f380081232ba9a2caa3804cade8 | |
| parent | a664c14001fa2359604527084c91d0864aa131a4 (diff) | |
| download | llvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.zip llvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.tar.gz llvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.tar.bz2 | |
Prevent dead uses in register coalescer after rematerialization
The coalescer does not check if register uses are available
at the point of rematerialization. If it attempts to rematerialize
an instruction with such uses it can end up with use without a def.
LiveRangeEdit does such check during rematerialization, so just
call LiveRangeEdit::allUsesAvailableAt() to avoid the problem.
Differential Revision: https://reviews.llvm.org/D106396
| -rw-r--r-- | llvm/include/llvm/CodeGen/LiveRangeEdit.h | 10 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/RegisterCoalescer.cpp | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir | 94 |
3 files changed, 115 insertions, 5 deletions
diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 5b26a44..fa4e801 100644 --- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -97,11 +97,6 @@ private: /// scanRemattable - Identify the Parent values that may rematerialize. void scanRemattable(AAResults *aa); - /// allUsesAvailableAt - Return true if all registers used by OrigMI at - /// OrigIdx are also available with the same value at UseIdx. - bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) const; - /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr *> &Dead); @@ -207,6 +202,11 @@ public: explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx) const; + /// canRematerializeAt - Determine if ParentVNI can be rematerialized at /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI. /// When cheapAsAMove is set, only cheap remats are allowed. diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index ce5fa2c..7daa677 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -201,6 +201,11 @@ namespace { /// Recursively eliminate dead defs in DeadDefs. void eliminateDeadDefs(); + /// allUsesAvailableAt - Return true if all registers used by OrigMI at + /// OrigIdx are also available with the same value at UseIdx. + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + /// LiveRangeEdit callback for eliminateDeadDefs(). void LRE_WillEraseInstruction(MachineInstr *MI) override; @@ -604,6 +609,14 @@ void RegisterCoalescer::eliminateDeadDefs() { nullptr, this).eliminateDeadDefs(DeadDefs); } +bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + SmallVector<Register, 8> NewRegs; + return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this) + .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx); +} + void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { // MI may be in WorkList. Make sure we don't visit it. ErasedInstrs.insert(MI); @@ -1343,6 +1356,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, } } + if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx)) + return false; + DebugLoc DL = CopyMI->getDebugLoc(); MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir new file mode 100644 index 0000000..d6f1d89 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir @@ -0,0 +1,94 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -o - -verify-coalescing -run-pass=simple-register-coalescing %s | FileCheck -check-prefix=GCN %s + +--- +# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed. + +name: no_remat_killed_src_in_inst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_killed_src_in_inst + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 killed %1, implicit $exec + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed +# after the MOV but before the COPY. + +name: no_remat_killed_src_after_inst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_killed_src_after_inst + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: KILL [[V_ADD_U32_e32_]] + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + KILL %1 + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Even if %1 is not killed do not rematerialize V_MOV_B32 so that we do not +# extend %1 liverange. + +name: no_remat_alive_src_in_inst_unused +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: no_remat_alive_src_in_inst_unused + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]] + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + $vgpr0 = COPY killed %2 + SI_RETURN_TO_EPILOG killed $vgpr0 +... +--- +# Rematerialize V_MOV_B32 since %1 is available at COPY and still alive. + +name: remat_alive_src_in_inst_used_and_available +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: remat_alive_src_in_inst_used_and_available + ; GCN: liveins: $vgpr0 + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec + ; GCN: $vgpr0 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec + ; GCN: SI_RETURN_TO_EPILOG $vgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec + $vgpr0 = COPY killed %2, implicit %1 + SI_RETURN_TO_EPILOG killed $vgpr0 +... |
