aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2021-07-20 13:53:14 -0700
committerStanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>2021-07-21 15:19:55 -0700
commitc54c76037b9d7e611fc3c2955b2b9a7934fbb957 (patch)
treecf9101a0795f5f380081232ba9a2caa3804cade8
parenta664c14001fa2359604527084c91d0864aa131a4 (diff)
downloadllvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.zip
llvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.tar.gz
llvm-c54c76037b9d7e611fc3c2955b2b9a7934fbb957.tar.bz2
Prevent dead uses in register coalescer after rematerialization
The coalescer does not check if register uses are available at the point of rematerialization. If it attempts to rematerialize an instruction with such uses it can end up with use without a def. LiveRangeEdit does such check during rematerialization, so just call LiveRangeEdit::allUsesAvailableAt() to avoid the problem. Differential Revision: https://reviews.llvm.org/D106396
-rw-r--r--llvm/include/llvm/CodeGen/LiveRangeEdit.h10
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp16
-rw-r--r--llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir94
3 files changed, 115 insertions, 5 deletions
diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index 5b26a44..fa4e801 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -97,11 +97,6 @@ private:
/// scanRemattable - Identify the Parent values that may rematerialize.
void scanRemattable(AAResults *aa);
- /// allUsesAvailableAt - Return true if all registers used by OrigMI at
- /// OrigIdx are also available with the same value at UseIdx.
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx) const;
-
/// foldAsLoad - If LI has a single use and a single def that can be folded as
/// a load, eliminate the register by folding the def into the use.
bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr *> &Dead);
@@ -207,6 +202,11 @@ public:
explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {}
};
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx) const;
+
/// canRematerializeAt - Determine if ParentVNI can be rematerialized at
/// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
/// When cheapAsAMove is set, only cheap remats are allowed.
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ce5fa2c..7daa677 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -201,6 +201,11 @@ namespace {
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx);
+
/// LiveRangeEdit callback for eliminateDeadDefs().
void LRE_WillEraseInstruction(MachineInstr *MI) override;
@@ -604,6 +609,14 @@ void RegisterCoalescer::eliminateDeadDefs() {
nullptr, this).eliminateDeadDefs(DeadDefs);
}
+bool RegisterCoalescer::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) {
+ SmallVector<Register, 8> NewRegs;
+ return LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, nullptr, this)
+ .allUsesAvailableAt(OrigMI, OrigIdx, UseIdx);
+}
+
void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
// MI may be in WorkList. Make sure we don't visit it.
ErasedInstrs.insert(MI);
@@ -1343,6 +1356,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
}
}
+ if (!allUsesAvailableAt(DefMI, ValNo->def, CopyIdx))
+ return false;
+
DebugLoc DL = CopyMI->getDebugLoc();
MachineBasicBlock *MBB = CopyMI->getParent();
MachineBasicBlock::iterator MII =
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir
new file mode 100644
index 0000000..d6f1d89
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-remat-dead-use.mir
@@ -0,0 +1,94 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -o - -verify-coalescing -run-pass=simple-register-coalescing %s | FileCheck -check-prefix=GCN %s
+
+---
+# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed.
+
+name: no_remat_killed_src_in_inst
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: no_remat_killed_src_in_inst
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+ ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+ ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 killed %1, implicit $exec
+ $vgpr0 = COPY killed %2
+ SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Do not rematerialize V_MOV_B32 at COPY because source register %1 is killed
+# after the MOV but before the COPY.
+
+name: no_remat_killed_src_after_inst
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: no_remat_killed_src_after_inst
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+ ; GCN: KILL [[V_ADD_U32_e32_]]
+ ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+ ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+ KILL %1
+ $vgpr0 = COPY killed %2
+ SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Even if %1 is not killed do not rematerialize V_MOV_B32 so that we do not
+# extend %1 liverange.
+
+name: no_remat_alive_src_in_inst_unused
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: no_remat_alive_src_in_inst_unused
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+ ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+ ; GCN: $vgpr0 = COPY [[V_MOV_B32_e32_]]
+ ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+ $vgpr0 = COPY killed %2
+ SI_RETURN_TO_EPILOG killed $vgpr0
+...
+---
+# Rematerialize V_MOV_B32 since %1 is available at COPY and still alive.
+
+name: remat_alive_src_in_inst_used_and_available
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: remat_alive_src_in_inst_used_and_available
+ ; GCN: liveins: $vgpr0
+ ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 1, [[COPY]], implicit $exec
+ ; GCN: $vgpr0 = V_MOV_B32_e32 [[V_ADD_U32_e32_]], implicit $exec
+ ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_U32_e32 1, %0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 %1, implicit $exec
+ $vgpr0 = COPY killed %2, implicit %1
+ SI_RETURN_TO_EPILOG killed $vgpr0
+...