diff options
author | Chinmay Deshpande <chinmay1dd@gmail.com> | 2024-05-22 13:51:55 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 13:51:55 -0700 |
commit | 848bef5d8549cdc79bb0eb3c5a8e0495e432b577 (patch) | |
tree | 9ad3572a49bfffa1fbd715c9bef893b94f4acf68 /llvm | |
parent | 25b65be43df56c1b7bea3fe2596fb36c2788d7af (diff) | |
download | llvm-848bef5d8549cdc79bb0eb3c5a8e0495e432b577.zip llvm-848bef5d8549cdc79bb0eb3c5a8e0495e432b577.tar.gz llvm-848bef5d8549cdc79bb0eb3c5a8e0495e432b577.tar.bz2 |
[llvm-mca] Add command line option -call-latency (#92958)
Currently we assume a constant latency of 100 cycles for call
instructions. This commit allows the user to specify a custom value for
the same as a command line argument. Default latency is set to 100.
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/include/llvm/MCA/InstrBuilder.h | 3 | ||||
-rw-r--r-- | llvm/lib/MCA/InstrBuilder.cpp | 20 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/call-latency.s | 58 | ||||
-rw-r--r-- | llvm/tools/llvm-mca/llvm-mca.cpp | 7 | ||||
-rw-r--r-- | llvm/unittests/tools/llvm-mca/MCATestBase.cpp | 2 | ||||
-rw-r--r-- | llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp | 4 |
6 files changed, 80 insertions, 14 deletions
diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h index 35943724..00c7942 100644 --- a/llvm/include/llvm/MCA/InstrBuilder.h +++ b/llvm/include/llvm/MCA/InstrBuilder.h @@ -78,6 +78,7 @@ class InstrBuilder { bool FirstCallInst; bool FirstReturnInst; + unsigned CallLatency; using InstRecycleCallback = std::function<Instruction *(const InstrDesc &)>; InstRecycleCallback InstRecycleCB; @@ -98,7 +99,7 @@ class InstrBuilder { public: InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, const MCRegisterInfo &RI, const MCInstrAnalysis *IA, - const InstrumentManager &IM); + const InstrumentManager &IM, unsigned CallLatency); void clear() { Descriptors.clear(); diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp index bcf065c..d5cbdc5 100644 --- a/llvm/lib/MCA/InstrBuilder.cpp +++ b/llvm/lib/MCA/InstrBuilder.cpp @@ -31,9 +31,9 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti, const llvm::MCInstrInfo &mcii, const llvm::MCRegisterInfo &mri, const llvm::MCInstrAnalysis *mcia, - const mca::InstrumentManager &im) + const mca::InstrumentManager &im, unsigned cl) : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true), - FirstReturnInst(true) { + FirstReturnInst(true), CallLatency(cl) { const MCSchedModel &SM = STI.getSchedModel(); ProcResourceMasks.resize(SM.getNumProcResourceKinds()); computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks); @@ -220,17 +220,19 @@ static void initializeUsedResources(InstrDesc &ID, static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, - const MCSubtargetInfo &STI) { + const MCSubtargetInfo &STI, + unsigned CallLatency) { if (MCDesc.isCall()) { // We cannot estimate how long this call will take. - // Artificially set an arbitrarily high latency (100cy). - ID.MaxLatency = 100U; + // Artificially set an arbitrarily high latency. + ID.MaxLatency = CallLatency; return; } int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc); - // If latency is unknown, then conservatively assume a MaxLatency of 100cy. - ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency); + // If latency is unknown, then conservatively assume the MaxLatency set for + // calls. + ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency); } static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) { @@ -568,7 +570,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, // We don't correctly model calls. WithColor::warning() << "found a call in the input assembly sequence.\n"; WithColor::note() << "call instructions are not correctly modeled. " - << "Assume a latency of 100cy.\n"; + << "Assume a latency of " << CallLatency << "cy.\n"; FirstCallInst = false; } @@ -580,7 +582,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI, } initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks); - computeMaxLatency(*ID, MCDesc, SCDesc, STI); + computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency); if (Error Err = verifyOperands(MCDesc, MCI)) return std::move(Err); diff --git a/llvm/test/tools/llvm-mca/X86/call-latency.s b/llvm/test/tools/llvm-mca/X86/call-latency.s new file mode 100644 index 0000000..9559d11 --- /dev/null +++ b/llvm/test/tools/llvm-mca/X86/call-latency.s @@ -0,0 +1,58 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 %s | FileCheck --check-prefixes=ALL,DEFAULT %s +# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -call-latency=50 -iterations=1 %s | FileCheck --check-prefixes=ALL,CUSTOM %s + +callq printf + +# ALL: Iterations: 1 +# ALL-NEXT: Instructions: 1 + +# CUSTOM-NEXT: Total Cycles: 53 +# DEFAULT-NEXT: Total Cycles: 103 + +# ALL-NEXT: Total uOps: 1 + +# ALL: Dispatch Width: 2 + +# CUSTOM-NEXT: uOps Per Cycle: 0.02 +# CUSTOM-NEXT: IPC: 0.02 + +# DEFAULT-NEXT: uOps Per Cycle: 0.01 +# DEFAULT-NEXT: IPC: 0.01 + +# ALL-NEXT: Block RThroughput: 0.5 + +# ALL: Instruction Info: +# ALL-NEXT: [1]: #uOps +# ALL-NEXT: [2]: Latency +# ALL-NEXT: [3]: RThroughput +# ALL-NEXT: [4]: MayLoad +# ALL-NEXT: [5]: MayStore +# ALL-NEXT: [6]: HasSideEffects (U) + +# ALL: [1] [2] [3] [4] [5] [6] Instructions: +# ALL-NEXT: 1 1 0.50 callq printf + +# ALL: Resources: +# ALL-NEXT: [0] - JALU0 +# ALL-NEXT: [1] - JALU1 +# ALL-NEXT: [2] - JDiv +# ALL-NEXT: [3] - JFPA +# ALL-NEXT: [4] - JFPM +# ALL-NEXT: [5] - JFPU0 +# ALL-NEXT: [6] - JFPU1 +# ALL-NEXT: [7] - JLAGU +# ALL-NEXT: [8] - JMul +# ALL-NEXT: [9] - JSAGU +# ALL-NEXT: [10] - JSTC +# ALL-NEXT: [11] - JVALU0 +# ALL-NEXT: [12] - JVALU1 +# ALL-NEXT: [13] - JVIMUL + +# ALL: Resource pressure per iteration: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# ALL-NEXT: - 1.00 - - - - - - - - - - - - + +# ALL: Resource pressure by instruction: +# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# ALL-NEXT: - 1.00 - - - - - - - - - - - - callq printf diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp index 03d7d79..cc5d4f5 100644 --- a/llvm/tools/llvm-mca/llvm-mca.cpp +++ b/llvm/tools/llvm-mca/llvm-mca.cpp @@ -135,6 +135,11 @@ static cl::opt<unsigned> "(instructions per cycle)"), cl::cat(ToolOptions), cl::init(0)); +static cl::opt<unsigned> + CallLatency("call-latency", cl::Hidden, + cl::desc("Number of cycles to assume for a call instruction"), + cl::cat(ToolOptions), cl::init(100U)); + enum class SkipType { NONE, LACK_SCHED, PARSE_FAILURE, ANY_FAILURE }; static cl::opt<enum SkipType> SkipUnsupportedInstructions( @@ -568,7 +573,7 @@ int main(int argc, char **argv) { } // Create an instruction builder. - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, CallLatency); // Create a context to control ownership of the pipeline hardware. mca::Context MCA(*MRI, *STI); diff --git a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp index 4f444fae..4a39f5e 100644 --- a/llvm/unittests/tools/llvm-mca/MCATestBase.cpp +++ b/llvm/unittests/tools/llvm-mca/MCATestBase.cpp @@ -66,7 +66,7 @@ Error MCATestBase::runBaselineMCA(json::Object &Result, ArrayRef<MCInst> Insts, // Default InstrumentManager auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); const SmallVector<mca::Instrument *> Instruments; SmallVector<std::unique_ptr<mca::Instruction>> LoweredInsts; diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp index 00a44dc..ac35dce 100644 --- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp +++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp @@ -33,7 +33,7 @@ TEST_F(X86TestBase, TestResumablePipeline) { P->addEventListener(SV.get()); auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); const SmallVector<mca::Instrument *> Instruments; // Tile size = 7 @@ -124,7 +124,7 @@ TEST_F(X86TestBase, TestInstructionRecycling) { // Default InstrumentManager auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII); - mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM); + mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100); IB.setInstRecycleCallback(GetRecycledInst); const SmallVector<mca::Instrument *> Instruments; |