aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/CodeGen/SplitKit.cpp
diff options
context:
space:
mode:
authorJay Foad <jay.foad@amd.com>2020-09-16 11:13:45 +0100
committerJay Foad <jay.foad@amd.com>2020-09-17 09:26:11 +0100
commit6f6d389da5c37e5e9a900902f03dc649d57919b7 (patch)
tree57d4711e580beefb7dba4e1de6e5c95ee2aa19d7 /llvm/lib/CodeGen/SplitKit.cpp
parentd49707cf4b288e8d3cad00a78cfa45ec4c376496 (diff)
downloadllvm-6f6d389da5c37e5e9a900902f03dc649d57919b7.zip
llvm-6f6d389da5c37e5e9a900902f03dc649d57919b7.tar.gz
llvm-6f6d389da5c37e5e9a900902f03dc649d57919b7.tar.bz2
[SplitKit] Only copy live lanes
When splitting a live interval with subranges, only insert copies for the lanes that are live at the point of the split. This avoids some unnecessary copies and fixes a problem where copying dead lanes was generating MIR that failed verification. The test case for this is test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir. Without this fix, some earlier live range splitting would create %430: %430 [256r,848r:0)[848r,2584r:1) 0@256r 1@848r L0000000000000003 [848r,2584r:0) 0@848r L0000000000000030 [256r,2584r:0) 0@256r weight:1.480938e-03 ... 256B undef %430.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 848B %430.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %430:vreg_128 Then RAGreedy::tryLocalSplit would split %430 into %432 and %433 just before 848B giving: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 %433 [844r,848r:0)[848r,2584r:1) 0@844r 1@848r L0000000000000030 [844r,2584r:0) 0@844r L0000000000000003 [844r,844d:0)[848r,2584r:1) 0@844r 1@848r weight:2.831776e-03 ... 256B undef %432.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %20.sub1:vreg_128, implicit $exec ... 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 { internal %433.sub2:vreg_128 = COPY %432.sub2:vreg_128 848B } %433.sub0:vreg_128 = V_AND_B32_e32 %92:sreg_32, %20.sub1:vreg_128, implicit $exec ... 2584B %431:vreg_128 = COPY %433:vreg_128 Note that the copy from %432 to %433 at 844B is a curious bundle-without-a-BUNDLE-instruction that SplitKit creates deliberately, and it includes a copy of .sub0 which is not live at this point, and that causes it to fail verification: *** Bad machine code: No live subrange at use *** - function: zextload_global_v64i16_to_v64i64 - basic block: %bb.0 (0x7faed48) [0B;2848B) - instruction: 844B undef %433.sub0:vreg_128 = COPY %432.sub0:vreg_128 - operand 1: %432.sub0:vreg_128 - interval: %432 [256r,844r:0) 0@256r L0000000000000030 [256r,844r:0) 0@256r weight:3.066802e-03 - at: 844B Using real bundles with a BUNDLE instruction might also fix this problem, but the current fix is less invasive and also avoids some unnecessary copies. https://bugs.llvm.org/show_bug.cgi?id=47492 Differential Revision: https://reviews.llvm.org/D87757
Diffstat (limited to 'llvm/lib/CodeGen/SplitKit.cpp')
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp9
1 files changed, 6 insertions, 3 deletions
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 372c7f8..4029c85 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -649,10 +649,13 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
}
if (!DidRemat) {
LaneBitmask LaneMask;
- if (LI->hasSubRanges()) {
+ if (OrigLI.hasSubRanges()) {
LaneMask = LaneBitmask::getNone();
- for (LiveInterval::SubRange &S : LI->subranges())
- LaneMask |= S.LaneMask;
+ for (LiveInterval::SubRange &S : OrigLI.subranges()) {
+ if (S.liveAt(UseIdx))
+ LaneMask |= S.LaneMask;
+ }
+ assert(LaneMask.any() && "Interval has no live subranges");
} else {
LaneMask = LaneBitmask::getAll();
}