aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorValery Pykhtin <valery.pykhtin@gmail.com>2023-12-08 11:27:08 +0100
committerGitHub <noreply@github.com>2023-12-08 11:27:08 +0100
commit901c5be524a52ea3a156abacff84d08190b48140 (patch)
treeb248472194d8880b8b856bea282beb0dcdd26375
parentcf47af493b1288b453a77a4b66959c6ee7a85c34 (diff)
downloadllvm-901c5be524a52ea3a156abacff84d08190b48140.zip
llvm-901c5be524a52ea3a156abacff84d08190b48140.tar.gz
llvm-901c5be524a52ea3a156abacff84d08190b48140.tar.bz2
[AMDGPU] Fix GCNUpwardRPTracker: max register pressure on defs. (#74422)
Treat a defined register as fully live "at" the instruction and update maximum pressure accordingly. Fixes #3786.
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.h26
-rw-r--r--llvm/test/CodeGen/AMDGPU/regpressure_printer.mir171
3 files changed, 112 insertions, 135 deletions
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 5ebf834..fd8f0be 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -274,32 +274,42 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
if (MI.isDebugInstr())
return;
- auto DecrementDef = [this](const MachineOperand &MO) {
+ // Kill all defs.
+ GCNRegPressure DefPressure, ECDefPressure;
+ bool HasECDefs = false;
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
+ continue;
+
Register Reg = MO.getReg();
+ LaneBitmask DefMask = getDefRegMask(MO, *MRI);
+
+ // Treat a def as fully live at the moment of definition: keep a record.
+ if (MO.isEarlyClobber()) {
+ ECDefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+ HasECDefs = true;
+ } else
+ DefPressure.inc(Reg, LaneBitmask::getNone(), DefMask, *MRI);
+
auto I = LiveRegs.find(Reg);
if (I == LiveRegs.end())
- return;
+ continue;
LaneBitmask &LiveMask = I->second;
LaneBitmask PrevMask = LiveMask;
- LiveMask &= ~getDefRegMask(MO, *MRI);
+ LiveMask &= ~DefMask;
CurPressure.inc(Reg, PrevMask, LiveMask, *MRI);
if (LiveMask.none())
LiveRegs.erase(I);
- };
-
- // Decrement non-early-clobber defs.
- SmallVector<const MachineOperand *, 2> EarlyClobberDefs;
- for (const MachineOperand &MO : MI.all_defs()) {
- if (!MO.getReg().isVirtual())
- continue;
- if (!MO.isEarlyClobber())
- DecrementDef(MO);
- else
- EarlyClobberDefs.push_back(&MO);
}
- // Increment uses.
+ // Update MaxPressure with defs pressure.
+ DefPressure += CurPressure;
+ if (HasECDefs)
+ DefPressure += ECDefPressure;
+ MaxPressure = max(DefPressure, MaxPressure);
+
+ // Make uses alive.
SmallVector<RegisterMaskPair, 8> RegUses;
collectVirtualRegUses(RegUses, MI, LIS, *MRI);
for (const RegisterMaskPair &U : RegUses) {
@@ -309,13 +319,9 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
CurPressure.inc(U.RegUnit, PrevMask, LiveMask, *MRI);
}
- // Point of maximum pressure: non-early-clobber defs are decremented and uses
- // are incremented.
- MaxPressure = max(CurPressure, MaxPressure);
-
- // Now decrement early clobber defs.
- for (const MachineOperand *MO : EarlyClobberDefs)
- DecrementDef(*MO);
+ // Update MaxPressure with uses plus early-clobber defs pressure.
+ MaxPressure = HasECDefs ? max(CurPressure + ECDefPressure, MaxPressure)
+ : max(CurPressure, MaxPressure);
assert(CurPressure == getRegPressure(*MRI, LiveRegs));
}
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index e21bf10..4100970 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -85,6 +85,18 @@ struct GCNRegPressure {
return !(*this == O);
}
+ GCNRegPressure &operator+=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] += RHS.Value[I];
+ return *this;
+ }
+
+ GCNRegPressure &operator-=(const GCNRegPressure &RHS) {
+ for (unsigned I = 0; I < TOTAL_KINDS; ++I)
+ Value[I] -= RHS.Value[I];
+ return *this;
+ }
+
void dump() const;
private:
@@ -105,6 +117,20 @@ inline GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2) {
return Res;
}
+inline GCNRegPressure operator+(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Sum = P1;
+ Sum += P2;
+ return Sum;
+}
+
+inline GCNRegPressure operator-(const GCNRegPressure &P1,
+ const GCNRegPressure &P2) {
+ GCNRegPressure Diff = P1;
+ Diff -= P2;
+ return Diff;
+}
+
class GCNRPTracker {
public:
using LiveRegSet = DenseMap<unsigned, LaneBitmask>;
diff --git a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
index 83e85cc..f0c5ba4 100644
--- a/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
+++ b/llvm/test/CodeGen/AMDGPU/regpressure_printer.mir
@@ -47,87 +47,46 @@ body: |
name: live_through_test
tracksRegLiveness: true
body: |
- ; RPU-LABEL: name: live_through_test
- ; RPU: bb.0:
- ; RPU-NEXT: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 3 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: Live-out: %0:00000000000000F3
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: bb.1:
- ; RPU-NEXT: Live-in: %0:00000000000000F3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPU-NEXT: 3 0
- ; RPU-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: Live-out: %0:00000000000000C3
- ; RPU-NEXT: Live-thr: %0:00000000000000C0
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: bb.2:
- ; RPU-NEXT: Live-in: %0:00000000000000C3
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 2 0
- ; RPU-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: live_through_test
- ; RPD: bb.0:
- ; RPD-NEXT: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: Live-out: %0:00000000000000F3
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: bb.1:
- ; RPD-NEXT: Live-in: %0:00000000000000F3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
- ; RPD-NEXT: 3 0
- ; RPD-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: Live-out: %0:00000000000000C3
- ; RPD-NEXT: Live-thr: %0:00000000000000C0
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: bb.2:
- ; RPD-NEXT: Live-in: %0:00000000000000C3
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 2 0
- ; RPD-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: live_through_test
+ ; RP: bb.0:
+ ; RP-NEXT: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: Live-out: %0:00000000000000F3
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: bb.1:
+ ; RP-NEXT: Live-in: %0:00000000000000F3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub0:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 4 0 %0.sub1:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 3 0 %0.sub2:sgpr_128 = IMPLICIT_DEF
+ ; RP-NEXT: 3 0
+ ; RP-NEXT: 3 0 S_NOP 0, implicit %0.sub2:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: Live-out: %0:00000000000000C3
+ ; RP-NEXT: Live-thr: %0:00000000000000C0
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: bb.2:
+ ; RP-NEXT: Live-in: %0:00000000000000C3
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 2 0
+ ; RP-NEXT: 2 0 S_NOP 0, implicit %0.sub3:sgpr_128, implicit %0.sub0:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
bb.0:
%0:sgpr_128 = IMPLICIT_DEF
bb.1:
@@ -223,7 +182,7 @@ body: |
; RPU-NEXT: 0 7
; RPU-NEXT: 0 7 %7:vgpr_32 = GLOBAL_LOAD_DWORD %5:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 6
- ; RPU-NEXT: 0 7 %8:vreg_64 = IMPLICIT_DEF
+ ; RPU-NEXT: 0 8 %8:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 7
; RPU-NEXT: 0 9 %9:vreg_64 = IMPLICIT_DEF
; RPU-NEXT: 0 9
@@ -262,7 +221,7 @@ body: |
; RPU-NEXT: 0 12
; RPU-NEXT: 0 12 dead %21:vgpr_32 = GLOBAL_LOAD_DWORD %14:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
- ; RPU-NEXT: 0 10 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
+ ; RPU-NEXT: 0 11 dead %22:vgpr_32 = GLOBAL_LOAD_DWORD %15:vreg_64, 0, 0, implicit $exec
; RPU-NEXT: 0 10
; RPU-NEXT: 0 10 %23:vreg_64 = V_LSHLREV_B64_e64 2, %8:vreg_64, implicit $exec
; RPU-NEXT: 0 9
@@ -550,7 +509,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 $sgpr0 = S_BUFFER_LOAD_DWORD_IMM $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0
; RPU-NEXT: 0 0
- ; RPU-NEXT: 0 0 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
+ ; RPU-NEXT: 0 1 undef %0.sub5:vreg_512 = V_MOV_B32_e32 5, implicit $exec
; RPU-NEXT: 0 0
; RPU-NEXT: 0 0 S_CMP_GT_U32 $sgpr0, 15, implicit-def $scc
; RPU-NEXT: 0 0
@@ -569,7 +528,7 @@ body: |
; RPU-NEXT: 0 1
; RPU-NEXT: 0 1 $m0 = S_MOV_B32 killed $sgpr0
; RPU-NEXT: 0 1
- ; RPU-NEXT: 0 1 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
+ ; RPU-NEXT: 0 16 %0:vreg_512 = V_INDIRECT_REG_WRITE_MOVREL_B32_V16 %0:vreg_512(tied-def 0), 42, 3, implicit $m0, implicit $exec
; RPU-NEXT: 0 1
; RPU-NEXT: Live-out: %0:0000000000000C00
; RPU-NEXT: Live-thr:
@@ -709,33 +668,19 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPU-LABEL: name: test_partially_used_early_clobber_def
- ; RPU: Live-in:
- ; RPU-NEXT: SGPR VGPR
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPU-NEXT: 4 0
- ; RPU-NEXT: 5 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
- ; RPU-NEXT: 1 0
- ; RPU-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
- ; RPU-NEXT: 0 0
- ; RPU-NEXT: Live-out:
- ; RPU-NEXT: Live-thr:
- ; RPU-NEXT: 0 0
- ;
- ; RPD-LABEL: name: test_partially_used_early_clobber_def
- ; RPD: Live-in:
- ; RPD-NEXT: SGPR VGPR
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
- ; RPD-NEXT: 4 0
- ; RPD-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
- ; RPD-NEXT: 1 0
- ; RPD-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
- ; RPD-NEXT: 0 0
- ; RPD-NEXT: Live-out:
- ; RPD-NEXT: Live-thr:
- ; RPD-NEXT: 0 0
+ ; RP-LABEL: name: test_partially_used_early_clobber_def
+ ; RP: Live-in:
+ ; RP-NEXT: SGPR VGPR
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+ ; RP-NEXT: 4 0
+ ; RP-NEXT: 8 0 early-clobber %1:sgpr_128 = COPY %0:sgpr_128
+ ; RP-NEXT: 1 0
+ ; RP-NEXT: 1 0 S_NOP 0, implicit %1.sub1:sgpr_128
+ ; RP-NEXT: 0 0
+ ; RP-NEXT: Live-out:
+ ; RP-NEXT: Live-thr:
+ ; RP-NEXT: 0 0
%0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
early-clobber %1:sgpr_128 = COPY %0:sgpr_128
S_NOP 0, implicit %1.sub1
@@ -752,7 +697,7 @@ body: |
; RPU-NEXT: 0 0
; RPU-NEXT: 4 0 %0:sgpr_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
; RPU-NEXT: 4 0
- ; RPU-NEXT: 7 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
+ ; RPU-NEXT: 16 0 %1:sgpr_128 = COPY %0:sgpr_128, implicit-def %2:sgpr_128, implicit-def early-clobber %3:sgpr_128, implicit-def dead early-clobber %4:sgpr_128
; RPU-NEXT: 6 0
; RPU-NEXT: 6 0 S_NOP 0, implicit %1.sub1:sgpr_128, implicit %2.sub0_sub1:sgpr_128, implicit %3.sub0_sub1_sub2:sgpr_128
; RPU-NEXT: 0 0