[flang][Driver] Preliminary support for -ftime-report (#122894)

The behavior is not entirely consistent with that of clang for the moment since detailed timing information on the LLVM IR optimization and code generation passes is not provided. The -ftime-report= option is also not enabled since that is only relevant for information about the LLVM IR passes. However, some code to handle that option has been included, to make it easier to support the option when the issues blocking it are resolved. A FortranSupport library has been created that is intended to mirror the LLVM and MLIR support libraries. Based on @tarunprabhu's PR https://github.com/llvm/llvm-project/pull/107270 with minor changes addressing latest review feedback. He's busy and we'd like to get this support in ASAP. Co-authored-by: Tarun Prabhu <tarun.prabhu@gmail.com>
author: macurtis-amd <macurtis@amd.com> 2025-01-14 08:02:02 -0600
committer: GitHub <noreply@github.com> 2025-01-14 08:02:02 -0600
commit: 310c281b020b169e760ca75f878f5873ffbb2a9f (patch)
tree: 79b81f4101229536c82eddf60bd1f09461dd281d /flang/lib/Frontend
parent: 8fe11a26ae8f12622ddec83a7b80637080843a8b (diff)
download: llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.zip
llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.tar.gz
llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.tar.bz2
4 files changed, 122 insertions, 7 deletions
diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt
index e954800..1b90fe8 100644
--- a/flang/lib/Frontend/CMakeLists.txt
+++ b/flang/lib/Frontend/CMakeLists.txt
@@ -29,6 +29,7 @@ add_flang_library(flangFrontend
   FortranEvaluate
   FortranCommon
   FortranLower
+  FortranSupport
   FIRDialect
   FIRDialectSupport
   FIRSupport
diff --git a/flang/lib/Frontend/CompilerInstance.cpp b/flang/lib/Frontend/CompilerInstance.cpp
index 35c2ae3..298790b 100644
--- a/flang/lib/Frontend/CompilerInstance.cpp
+++ b/flang/lib/Frontend/CompilerInstance.cpp
@@ -17,9 +17,12 @@
 #include "flang/Parser/parsing.h"
 #include "flang/Parser/provenance.h"
 #include "flang/Semantics/semantics.h"
+#include "flang/Support/Timing.h"
+#include "mlir/Support/RawOstreamExtras.h"
 #include "clang/Basic/DiagnosticFrontend.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/MC/TargetRegistry.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
@@ -147,7 +150,7 @@ void CompilerInstance::clearOutputFiles(bool eraseFiles) {
 }
 
 bool CompilerInstance::executeAction(FrontendAction &act) {
-  auto &invoc = this->getInvocation();
+  CompilerInvocation &invoc = this->getInvocation();
 
   llvm::Triple targetTriple{llvm::Triple(invoc.getTargetOpts().triple)};
   if (targetTriple.getArch() == llvm::Triple::ArchType::x86_64) {
@@ -167,6 +170,25 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
   // Set options controlling lowering to FIR.
   invoc.setLoweringOptions();
 
+  if (invoc.getEnableTimers()) {
+    llvm::TimePassesIsEnabled = true;
+
+    timingStreamMLIR = std::make_unique<Fortran::support::string_ostream>();
+    timingStreamLLVM = std::make_unique<Fortran::support::string_ostream>();
+    timingStreamCodeGen = std::make_unique<Fortran::support::string_ostream>();
+
+    timingMgr.setEnabled(true);
+    timingMgr.setDisplayMode(mlir::DefaultTimingManager::DisplayMode::Tree);
+    timingMgr.setOutput(
+        Fortran::support::createTimingFormatterText(*timingStreamMLIR));
+
+    // Creating a new TimingScope will automatically start the timer. Since this
+    // is the top-level timer, this is ok because it will end up capturing the
+    // time for all the bookkeeping and other tasks that take place between
+    // parsing, lowering etc. for which finer-grained timers will be created.
+    timingScopeRoot = timingMgr.getRootScope();
+  }
+
   // Run the frontend action `act` for every input file.
   for (const FrontendInputFile &fif : getFrontendOpts().inputs) {
     if (act.beginSourceFile(*this, fif)) {
@@ -176,6 +198,34 @@ bool CompilerInstance::executeAction(FrontendAction &act) {
       act.endSourceFile();
     }
   }
+
+  if (timingMgr.isEnabled()) {
+    timingScopeRoot.stop();
+
+    // Write the timings to the associated output stream and clear all timers.
+    // We need to provide another stream because the TimingManager will attempt
+    // to print in its destructor even if it has been cleared. By the time that
+    // destructor runs, the output streams will have been destroyed, so give it
+    // a null stream.
+    timingMgr.print();
+    timingMgr.setOutput(
+        Fortran::support::createTimingFormatterText(mlir::thread_safe_nulls()));
+
+    // This prints the timings in "reverse" order, starting from code
+    // generation, followed by LLVM-IR optimizations, then MLIR optimizations
+    // and transformations and the frontend. If any of the steps are disabled,
+    // for instance because code generation was not performed, the strings
+    // will be empty.
+    if (!timingStreamCodeGen->str().empty())
+      llvm::errs() << timingStreamCodeGen->str() << "\n";
+
+    if (!timingStreamLLVM->str().empty())
+      llvm::errs() << timingStreamLLVM->str() << "\n";
+
+    if (!timingStreamMLIR->str().empty())
+      llvm::errs() << timingStreamMLIR->str() << "\n";
+  }
+
   return !getDiagnostics().getClient()->getNumErrors();
 }
 
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 340efb1..5e71273 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -1436,6 +1436,10 @@ bool CompilerInvocation::createFromArgs(
     }
   }
 
+  // Process the timing-related options.
+  if (args.hasArg(clang::driver::options::OPT_ftime_report))
+    invoc.enableTimers = true;
+
   invoc.setArgv0(argv0);
 
   return success;
diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp
index 310cd65..52a18d5 100644
--- a/flang/lib/Frontend/FrontendActions.cpp
+++ b/flang/lib/Frontend/FrontendActions.cpp
@@ -84,6 +84,15 @@ extern cl::opt<bool> PrintPipelinePasses;
 
 using namespace Fortran::frontend;
 
+constexpr llvm::StringLiteral timingIdParse = "Parse";
+constexpr llvm::StringLiteral timingIdMLIRGen = "MLIR generation";
+constexpr llvm::StringLiteral timingIdMLIRPasses =
+    "MLIR translation/optimization";
+constexpr llvm::StringLiteral timingIdLLVMIRGen = "LLVM IR generation";
+constexpr llvm::StringLiteral timingIdLLVMIRPasses = "LLVM IR optimizations";
+constexpr llvm::StringLiteral timingIdBackend =
+    "Assembly/Object code generation";
+
 // Declare plugin extension function declarations.
 #define HANDLE_EXTENSION(Ext)                                                  \
   llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
@@ -227,6 +236,14 @@ static void addAMDGPUSpecificMLIRItems(mlir::ModuleOp mlirModule,
 bool CodeGenAction::beginSourceFileAction() {
   llvmCtx = std::make_unique<llvm::LLVMContext>();
   CompilerInstance &ci = this->getInstance();
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
+
+  // This will provide timing information even when the input is an LLVM IR or
+  // MLIR file. That is fine because those do have to be parsed, so the label
+  // is still accurate.
+  mlir::TimingScope timingScopeParse = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdParse, timingMgr));
 
   // If the input is an LLVM file, just parse it and return.
   if (this->getCurrentInput().getKind().getLanguage() == Language::LLVM_IR) {
@@ -288,6 +305,10 @@ bool CodeGenAction::beginSourceFileAction() {
   if (!res)
     return res;
 
+  timingScopeParse.stop();
+  mlir::TimingScope timingScopeMLIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRGen, timingMgr));
+
   // Create a LoweringBridge
   const common::IntrinsicTypeDefaultKinds &defKinds =
       ci.getSemanticsContext().defaultKinds();
@@ -322,6 +343,7 @@ bool CodeGenAction::beginSourceFileAction() {
   // constants etc.
   addDependentLibs(*mlirModule, ci);
   addAMDGPUSpecificMLIRItems(*mlirModule, ci);
+  timingScopeMLIRGen.stop();
 
   // run the default passes.
   mlir::PassManager pm((*mlirModule)->getName(),
@@ -344,6 +366,7 @@ bool CodeGenAction::beginSourceFileAction() {
 
   pm.enableVerifier(/*verifyPasses=*/true);
   pm.addPass(std::make_unique<Fortran::lower::VerifierPass>());
+  pm.enableTiming(timingScopeMLIRGen);
 
   if (mlir::failed(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
@@ -352,6 +375,7 @@ bool CodeGenAction::beginSourceFileAction() {
     ci.getDiagnostics().Report(diagID);
     return false;
   }
+  timingScopeMLIRGen.stop();
 
   // Print initial full MLIR module, before lowering or transformations, if
   // -save-temps has been specified.
@@ -704,8 +728,10 @@ void CodeGenAction::lowerHLFIRToFIR() {
   assert(mlirModule && "The MLIR module has not been generated yet.");
 
   CompilerInstance &ci = this->getInstance();
-  auto opts = ci.getInvocation().getCodeGenOpts();
+  const CodeGenOptions &opts = ci.getInvocation().getCodeGenOpts();
   llvm::OptimizationLevel level = mapToLevel(opts);
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   fir::support::loadDialects(*mlirCtx);
 
@@ -724,6 +750,9 @@ void CodeGenAction::lowerHLFIRToFIR() {
       level);
   (void)mlir::applyPassManagerCLOptions(pm);
 
+  mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRPasses, timingMgr));
+  pm.enableTiming(timingScopeMLIRPasses);
   if (!mlir::succeeded(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
         clang::DiagnosticsEngine::Error, "Lowering to FIR failed");
@@ -808,9 +837,12 @@ void CodeGenAction::generateLLVMIR() {
   assert(mlirModule && "The MLIR module has not been generated yet.");
 
   CompilerInstance &ci = this->getInstance();
-  auto opts = ci.getInvocation().getCodeGenOpts();
-  auto mathOpts = ci.getInvocation().getLoweringOpts().getMathOptions();
+  CompilerInvocation &invoc = ci.getInvocation();
+  const CodeGenOptions &opts = invoc.getCodeGenOpts();
+  const auto &mathOpts = invoc.getLoweringOpts().getMathOptions();
   llvm::OptimizationLevel level = mapToLevel(opts);
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   fir::support::loadDialects(*mlirCtx);
   mlir::DialectRegistry registry;
@@ -846,11 +878,15 @@ void CodeGenAction::generateLLVMIR() {
   (void)mlir::applyPassManagerCLOptions(pm);
 
   // run the pass manager
+  mlir::TimingScope timingScopeMLIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdMLIRPasses, timingMgr));
+  pm.enableTiming(timingScopeMLIRPasses);
   if (!mlir::succeeded(pm.run(*mlirModule))) {
     unsigned diagID = ci.getDiagnostics().getCustomDiagID(
         clang::DiagnosticsEngine::Error, "Lowering to LLVM IR failed");
     ci.getDiagnostics().Report(diagID);
   }
+  timingScopeMLIRPasses.stop();
 
   // Print final MLIR module, just before translation into LLVM IR, if
   // -save-temps has been specified.
@@ -863,6 +899,8 @@ void CodeGenAction::generateLLVMIR() {
   }
 
   // Translate to LLVM IR
+  mlir::TimingScope timingScopeLLVMIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRGen, timingMgr));
   std::optional<llvm::StringRef> moduleName = mlirModule->getName();
   llvmModule = mlir::translateModuleToLLVMIR(
       *mlirModule, *llvmCtx, moduleName ? *moduleName : "FIRModule");
@@ -969,11 +1007,12 @@ static void generateMachineCodeOrAssemblyImpl(clang::DiagnosticsEngine &diags,
 }
 
 void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
-  auto opts = getInstance().getInvocation().getCodeGenOpts();
-  auto &diags = getInstance().getDiagnostics();
+  CompilerInstance &ci = getInstance();
+  const CodeGenOptions &opts = ci.getInvocation().getCodeGenOpts();
+  clang::DiagnosticsEngine &diags = ci.getDiagnostics();
   llvm::OptimizationLevel level = mapToLevel(opts);
 
-  llvm::TargetMachine *targetMachine = &getInstance().getTargetMachine();
+  llvm::TargetMachine *targetMachine = &ci.getTargetMachine();
   // Create the analysis managers.
   llvm::LoopAnalysisManager lam;
   llvm::FunctionAnalysisManager fam;
@@ -987,6 +1026,8 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
   llvm::StandardInstrumentations si(llvmModule->getContext(),
                                     opts.DebugPassManager);
   si.registerCallbacks(pic, &mam);
+  if (ci.isTimingEnabled())
+    si.getTimePasses().setOutStream(ci.getTimingStreamLLVM());
   llvm::PassBuilder pb(targetMachine, pto, pgoOpt, &pic);
 
   // Attempt to load pass plugins and register their callbacks with PB.
@@ -1049,6 +1090,10 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) {
   // Run the passes.
   mpm.run(*llvmModule, mam);
 
+  // Print the timers to the associated output stream and reset them.
+  if (ci.isTimingEnabled())
+    si.getTimePasses().print();
+
   // Cleanup
   delete tlii;
 }
@@ -1271,6 +1316,8 @@ void CodeGenAction::executeAction() {
   const CodeGenOptions &codeGenOpts = ci.getInvocation().getCodeGenOpts();
   Fortran::lower::LoweringOptions &loweringOpts =
       ci.getInvocation().getLoweringOpts();
+  mlir::DefaultTimingManager &timingMgr = ci.getTimingManager();
+  mlir::TimingScope &timingScopeRoot = ci.getTimingScopeRoot();
 
   // If the output stream is a file, generate it and define the corresponding
   // output stream. If a pre-defined output stream is available, we will use
@@ -1316,6 +1363,11 @@ void CodeGenAction::executeAction() {
   if (!llvmModule)
     generateLLVMIR();
 
+  // This will already have been started in generateLLVMIR(). But we need to
+  // continue operating on the module, so we continue timing it.
+  mlir::TimingScope timingScopeLLVMIRGen = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRGen, timingMgr));
+
   // If generating the LLVM module failed, abort! No need for further error
   // reporting since generateLLVMIR() does this already.
   if (!llvmModule)
@@ -1345,6 +1397,7 @@ void CodeGenAction::executeAction() {
   // Embed offload objects specified with -fembed-offload-object
   if (!codeGenOpts.OffloadObjects.empty())
     embedOffloadObjects();
+  timingScopeLLVMIRGen.stop();
 
   BackendRemarkConsumer remarkConsumer(diags, codeGenOpts);
 
@@ -1373,7 +1426,10 @@ void CodeGenAction::executeAction() {
   }
 
   // Run LLVM's middle-end (i.e. the optimizer).
+  mlir::TimingScope timingScopeLLVMIRPasses = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdLLVMIRPasses, timingMgr));
   runOptimizationPipeline(ci.isOutputStreamNull() ? *os : ci.getOutputStream());
+  timingScopeLLVMIRPasses.stop();
 
   if (action == BackendActionTy::Backend_EmitLL ||
       action == BackendActionTy::Backend_EmitBC) {
@@ -1382,11 +1438,15 @@ void CodeGenAction::executeAction() {
   }
 
   // Run LLVM's backend and generate either assembly or machine code
+  mlir::TimingScope timingScopeBackend = timingScopeRoot.nest(
+      mlir::TimingIdentifier::get(timingIdBackend, timingMgr));
   if (action == BackendActionTy::Backend_EmitAssembly ||
       action == BackendActionTy::Backend_EmitObj) {
     generateMachineCodeOrAssemblyImpl(
         diags, targetMachine, action, *llvmModule, codeGenOpts,
         ci.isOutputStreamNull() ? *os : ci.getOutputStream());
+    if (timingMgr.isEnabled())
+      llvm::reportAndResetTimings(&ci.getTimingStreamCodeGen());
     return;
   }
 }
author	macurtis-amd <macurtis@amd.com>	2025-01-14 08:02:02 -0600
committer	GitHub <noreply@github.com>	2025-01-14 08:02:02 -0600
commit	310c281b020b169e760ca75f878f5873ffbb2a9f (patch)
tree	79b81f4101229536c82eddf60bd1f09461dd281d /flang/lib/Frontend
parent	8fe11a26ae8f12622ddec83a7b80637080843a8b (diff)
download	llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.zip llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.tar.gz llvm-310c281b020b169e760ca75f878f5873ffbb2a9f.tar.bz2