diff options
author | Koakuma <koachan@protonmail.com> | 2024-01-15 04:28:51 +0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-14 16:28:51 -0500 |
commit | 63f98290d09e1da05fb1217d1b760cbe24b76db9 (patch) | |
tree | ddb7d53229c8c157070315db9882dd746128aefe /llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | |
parent | 72990df072a56996612169f07c5752a6924288bb (diff) | |
download | llvm-63f98290d09e1da05fb1217d1b760cbe24b76db9.zip llvm-63f98290d09e1da05fb1217d1b760cbe24b76db9.tar.gz llvm-63f98290d09e1da05fb1217d1b760cbe24b76db9.tar.bz2 |
[SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (#77196)users/koachan/main.sparc-prefer-rdpc-over-call-to-implement-getpcx-for-64-bit-target
On 64-bit target, prefer usng RDPC over CALL to get the value of %pc.
This is faster on modern processors (Niagara T1 and newer) and avoids polluting
the processor's predictor state.
The old behavior of using a fake CALL is still done when tuning for classic
UltraSPARC processors, since RDPC is much slower there.
A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads,
and about 7% speedup on INSERT/UPDATE loads.
Diffstat (limited to 'llvm/lib/Target/Sparc/SparcAsmPrinter.cpp')
-rw-r--r-- | llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 25 |
1 files changed, 22 insertions, 3 deletions
diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index cca624e..215a8ea 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -13,6 +13,7 @@ #include "MCTargetDesc/SparcInstPrinter.h" #include "MCTargetDesc/SparcMCExpr.h" +#include "MCTargetDesc/SparcMCTargetDesc.h" #include "MCTargetDesc/SparcTargetStreamer.h" #include "Sparc.h" #include "SparcInstrInfo.h" @@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer, OutStreamer.emitInstruction(CallInst, STI); } +static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD, + const MCSubtargetInfo &STI) { + MCInst RDPCInst; + RDPCInst.setOpcode(SP::RDASR); + RDPCInst.addOperand(RD); + RDPCInst.addOperand(MCOperand::createReg(SP::ASR5)); + OutStreamer.emitInstruction(RDPCInst, STI); +} + static void EmitSETHI(MCStreamer &OutStreamer, MCOperand &Imm, MCOperand &RD, const MCSubtargetInfo &STI) @@ -226,7 +236,7 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, MCOperand RegO7 = MCOperand::createReg(SP::O7); // <StartLabel>: - // call <EndLabel> + // <GET-PC> // This will be either `call <EndLabel>` or `rd %pc, %o7`. // <SethiLabel>: // sethi %hi(_GLOBAL_OFFSET_TABLE_+(<SethiLabel>-<StartLabel>)), <MO> // <EndLabel>: @@ -234,8 +244,17 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI, // add <MO>, %o7, <MO> OutStreamer->emitLabel(StartLabel); - MCOperand Callee = createPCXCallOP(EndLabel, OutContext); - EmitCall(*OutStreamer, Callee, STI); + if (!STI.getTargetTriple().isSPARC64() || + STI.hasFeature(Sparc::TuneSlowRDPC)) { + MCOperand Callee = createPCXCallOP(EndLabel, OutContext); + EmitCall(*OutStreamer, Callee, STI); + } else { + // TODO find out whether it is possible to store PC + // in other registers, to enable leaf function optimization. + // (On the other hand, approx. over 97.8% of GETPCXes happen + // in non-leaf functions, so would this be worth the effort?) + EmitRDPC(*OutStreamer, RegO7, STI); + } OutStreamer->emitLabel(SethiLabel); MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22, GOTLabel, StartLabel, SethiLabel, |