aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
diff options
context:
space:
mode:
authorNemanja Ivanovic <nemanja.i.ibm@gmail.com>2022-06-20 08:45:24 -0500
committerLei Huang <lei@ca.ibm.com>2022-06-20 14:30:29 -0500
commite09f6ff3c19a2d40a7fca1228c1ae789a4427ca6 (patch)
treef3337307dcca8efba85cac8e0b944071174f4fc7 /llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
parent4cd416193cc126355a22b2c9e5c1df3a49b59e50 (diff)
downloadllvm-e09f6ff3c19a2d40a7fca1228c1ae789a4427ca6.zip
llvm-e09f6ff3c19a2d40a7fca1228c1ae789a4427ca6.tar.gz
llvm-e09f6ff3c19a2d40a7fca1228c1ae789a4427ca6.tar.bz2
[PowerPC] Disable automatic generation of STXVP
There are instances where using paired vector stores leads to significant performance degradation due to issues with store forwarding.To avoid falling into this trap with compiler - generated code, we will not emit these instructions unless the user requests them explicitly(with a builtin or by specifying the option). Reviewed By : lei, amyk, saghir Differential Revision: https://reviews.llvm.org/D127218
Diffstat (limited to 'llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp')
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp75
1 files changed, 69 insertions, 6 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index b98d293..7349eb8 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -91,6 +91,8 @@ ReportAccMoves("ppc-report-acc-moves",
cl::Hidden, cl::init(false));
#endif
+extern cl::opt<bool> DisableAutoPairedVecSt;
+
static unsigned offsetMinAlignForOpcode(unsigned OpC);
PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
@@ -1199,6 +1201,59 @@ static void emitAccSpillRestoreInfo(MachineBasicBlock &MBB, bool IsPrimed,
#endif
}
+static void spillRegPairs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator II, DebugLoc DL,
+ const TargetInstrInfo &TII, Register SrcReg,
+ unsigned FrameIndex, bool IsLittleEndian,
+ bool IsKilled, bool TwoPairs) {
+ unsigned Offset = 0;
+ if (TwoPairs)
+ Offset = IsLittleEndian ? 48 : 0;
+ else
+ Offset = IsLittleEndian ? 16 : 0;
+ Register Reg = (SrcReg > PPC::VSRp15) ? PPC::V0 + (SrcReg - PPC::VSRp16) * 2
+ : PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ if (TwoPairs) {
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 2, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ Offset += IsLittleEndian ? -16 : 16;
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXV))
+ .addReg(Reg + 3, getKillRegState(IsKilled)),
+ FrameIndex, Offset);
+ }
+}
+
+/// Remove any STXVP[X] instructions and split them out into a pair of
+/// STXV[X] instructions if --disable-auto-paired-vec-st is specified on
+/// the command line.
+void PPCRegisterInfo::lowerOctWordSpilling(MachineBasicBlock::iterator II,
+ unsigned FrameIndex) const {
+ assert(DisableAutoPairedVecSt &&
+ "Expecting to do this only if paired vector stores are disabled.");
+ MachineInstr &MI = *II; // STXVP <SrcReg>, <offset>
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
+ const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+ DebugLoc DL = MI.getDebugLoc();
+ Register SrcReg = MI.getOperand(0).getReg();
+ bool IsLittleEndian = Subtarget.isLittleEndian();
+ bool IsKilled = MI.getOperand(0).isKill();
+ spillRegPairs(MBB, II, DL, TII, SrcReg, FrameIndex, IsLittleEndian, IsKilled,
+ /* TwoPairs */ false);
+ // Discard the original instruction.
+ MBB.erase(II);
+}
+
/// lowerACCSpilling - Generate the code for spilling the accumulator register.
/// Similarly to other spills/reloads that use pseudo-ops, we do not actually
/// eliminate the FrameIndex here nor compute the stack offset. We simply
@@ -1228,12 +1283,17 @@ void PPCRegisterInfo::lowerACCSpilling(MachineBasicBlock::iterator II,
// adjust the offset of the store that is within the 64-byte stack slot.
if (IsPrimed)
BuildMI(MBB, II, DL, TII.get(PPC::XXMFACC), SrcReg).addReg(SrcReg);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(Reg, getKillRegState(IsKilled)),
- FrameIndex, IsLittleEndian ? 32 : 0);
- addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
- .addReg(Reg + 1, getKillRegState(IsKilled)),
- FrameIndex, IsLittleEndian ? 0 : 32);
+ if (DisableAutoPairedVecSt)
+ spillRegPairs(MBB, II, DL, TII, Reg, FrameIndex, IsLittleEndian, IsKilled,
+ /* TwoPairs */ true);
+ else {
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 32 : 0);
+ addFrameReference(BuildMI(MBB, II, DL, TII.get(PPC::STXVP))
+ .addReg(Reg + 1, getKillRegState(IsKilled)),
+ FrameIndex, IsLittleEndian ? 0 : 32);
+ }
if (IsPrimed && !IsKilled)
BuildMI(MBB, II, DL, TII.get(PPC::XXMTACC), SrcReg).addReg(SrcReg);
@@ -1469,6 +1529,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
} else if (OpC == PPC::RESTORE_ACC || OpC == PPC::RESTORE_UACC) {
lowerACCRestore(II, FrameIndex);
return;
+ } else if (OpC == PPC::STXVP && DisableAutoPairedVecSt) {
+ lowerOctWordSpilling(II, FrameIndex);
+ return;
} else if (OpC == PPC::SPILL_QUADWORD) {
lowerQuadwordSpilling(II, FrameIndex);
return;