aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SplitKit.cpp
diff options
context:
space:
mode:
authorRuiling Song <ruiling.song@amd.com>2021-08-10 15:59:44 +0800
committerRuiling Song <ruiling.song@amd.com>2021-08-13 07:36:38 +0800
commite1beebbac5da99b2451f25c2028531c12a70860a (patch)
tree53dc2907b324fb22ed5e4aaa90fefe2d61ccf59b /llvm/lib/CodeGen/SplitKit.cpp
parent1b194ef1ab3b856afb8458fa9c58408360d292cb (diff)
downloadllvm-e1beebbac5da99b2451f25c2028531c12a70860a.zip
llvm-e1beebbac5da99b2451f25c2028531c12a70860a.tar.gz
llvm-e1beebbac5da99b2451f25c2028531c12a70860a.tar.bz2
SplitKit: Don't further split subrange mask in buildCopy
We may use several COPY instructions to copy the needed sub-registers during split. But the way we split the lanes during the COPYs may be different from the subranges of the old register. This would fail when we extend the subranges of the new register because the LaneMasks do not match exactly between subranges of new register and old register. Since we are bundling the COPYs, I think there is no need to further refine the subranges of the new register based on the set of LaneMasks of the inserted COPYs. I am not sure if there will be further breaking cases. But as the subranges of new register are created based on the LaneMasks of the subranges of old register, it will be highly possible we will always find an exact LaneMask match. We can think about how to make the extendPHIKillRanges() work for subrange mask mismatch case if we meet more such cases in the future. The test case was from D105065 by @arsenm. Differential Revision: https://reviews.llvm.org/D107829
Diffstat (limited to 'llvm/lib/CodeGen/SplitKit.cpp')
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp23
1 files changed, 12 insertions, 11 deletions
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index c70620f..38493ed 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -529,19 +529,12 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
| getInternalReadRegState(!FirstCopy), SubIdx)
.addReg(FromReg, 0, SubIdx);
- BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
- LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
- DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange &SR) {
- SR.createDeadDef(Def, Allocator);
- },
- Indexes, TRI);
return Def;
}
@@ -549,11 +542,11 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
MachineInstr *CopyMI =
BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg);
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
return Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
}
@@ -567,18 +560,26 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
const TargetRegisterClass *RC = MRI.getRegClass(FromReg);
assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class");
- SmallVector<unsigned, 8> Indexes;
+ SmallVector<unsigned, 8> SubIndexes;
// Abort if we cannot possibly implement the COPY with the given indexes.
- if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes))
+ if (!TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, SubIndexes))
report_fatal_error("Impossible to implement partial COPY");
SlotIndex Def;
- for (unsigned BestIdx : Indexes) {
+ for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
DestLI, Late, Def);
}
+ BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ DestLI.refineSubRanges(
+ Allocator, LaneMask,
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
+
return Def;
}