diff options
author | Yuta Mukai <mukai.yuta@fujitsu.com> | 2022-08-09 13:11:24 +0900 |
---|---|---|
committer | KAWASHIMA Takahiro <t-kawashima@fujitsu.com> | 2022-08-09 13:14:26 +0900 |
commit | 5357dd2f43a5aab28f34b69e7d3742dd50cbad98 (patch) | |
tree | a7c069e54758e890c5dfe0d333f2df6e317ca4a0 /llvm/lib/CodeGen/ModuloSchedule.cpp | |
parent | 0972a390b9c74cd994ad0250cf392aecb67502a3 (diff) | |
download | llvm-5357dd2f43a5aab28f34b69e7d3742dd50cbad98.zip llvm-5357dd2f43a5aab28f34b69e7d3742dd50cbad98.tar.gz llvm-5357dd2f43a5aab28f34b69e7d3742dd50cbad98.tar.bz2 |
[MachinePipeliner] Fix Phi generation failure for large stages
The previous code overwrites VRMap for prologue stages during Phi
generation if a register spans many stages.
As a result, the wrong register is used as the one coming from
the prologue in Phis at later stages. (A process exists to correct
this, but it does not work in all cases.)
In addition, VRMap for prologue must be preserved until addBranches().
This patch fixes them by separating the map for Phis into a different
variable (VRMapPhi).
Reviewed By: bcahoon
Differential Revision: https://reviews.llvm.org/D127840
Diffstat (limited to 'llvm/lib/CodeGen/ModuloSchedule.cpp')
-rw-r--r-- | llvm/lib/CodeGen/ModuloSchedule.cpp | 75 |
1 files changed, 54 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp index 581168b..d689e8e 100644 --- a/llvm/lib/CodeGen/ModuloSchedule.cpp +++ b/llvm/lib/CodeGen/ModuloSchedule.cpp @@ -116,6 +116,12 @@ void ModuloScheduleExpander::generatePipelinedLoop() { // a map between register names in the original block and the names created // in each stage of the pipelined loop. ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + + // The renaming destination by Phis for the registers across stages. + // This map is updated during Phis generation to point to the most recent + // renaming destination. + ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; SmallVector<MachineBasicBlock *, 4> PrologBBs; @@ -151,14 +157,15 @@ void ModuloScheduleExpander::generatePipelinedLoop() { generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, - MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi, + InstrMap, MaxStageCount, MaxStageCount, false); LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); SmallVector<MachineBasicBlock *, 4> EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs, + PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -171,6 +178,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() { addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); delete[] VRMap; + delete[] VRMapPhi; } void ModuloScheduleExpander::cleanup() { @@ -242,7 +250,8 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage, /// block for each stage that needs to complete. void ModuloScheduleExpander::generateEpilog( unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, - ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -296,8 +305,8 @@ void ModuloScheduleExpander::generateEpilog( } generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, - LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi, + InstrMap, LastStage, EpilogStage, i == 1); PredBB = NewBB; LLVM_DEBUG({ @@ -593,8 +602,9 @@ void ModuloScheduleExpander::generateExistingPhis( /// use in the pipelined sequence. void ModuloScheduleExpander::generatePhis( MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { // Compute the stage number that contains the initial Phi value, and // the Phi from the previous stage. unsigned PrologStage = 0; @@ -631,26 +641,49 @@ void ModuloScheduleExpander::generatePhis( if (!InKernel && (unsigned)StageScheduled > PrologStage) continue; - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + unsigned PhiOp2; + if (InKernel) { + PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + } // The number of Phis can't exceed the number of prolog stages. The // prolog stage number is zero based. if (NumPhis > PrologStage + 1 - StageScheduled) NumPhis = PrologStage + 1 - StageScheduled; for (unsigned np = 0; np < NumPhis; ++np) { + // Example for + // Org: + // %Org = ... (Scheduled at Stage#0, NumPhi = 2) + // + // Prolog0 (Stage0): + // %Clone0 = ... + // Prolog1 (Stage1): + // %Clone1 = ... + // Kernel (Stage2): + // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel + // %Clone2 = ... + // Epilog0 (Stage3): + // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel + // Epilog1 (Stage4): + // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0 + // + // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2} + // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0} + // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2} + unsigned PhiOp1 = VRMap[PrologStage][Def]; if (np <= PrologStage) PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + if (!InKernel) { + if (PrevStage == LastStageNum && np == 0) + PhiOp2 = VRMap[LastStageNum][Def]; + else + PhiOp2 = VRMapPhi[PrevStage - np][Def]; } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; const TargetRegisterClass *RC = MRI.getRegClass(Def); Register NewReg = MRI.createVirtualRegister(RC); @@ -672,9 +705,9 @@ void ModuloScheduleExpander::generatePhis( NewReg); PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; + VRMapPhi[PrevStage - np - 1][Def] = NewReg; } else { - VRMap[CurStageNum - np][Def] = NewReg; + VRMapPhi[CurStageNum - np][Def] = NewReg; if (np == NumPhis - 1) rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, NewReg); |