[MachineCombiner] Support for floating-point FMA on ARM64

Evaluates fmul+fadd -> fmadd combines and similar code sequences in the machine combiner. It adds support for float and double similar to the existing integer implementation. The key features are: - DAGCombiner checks whether it should combine greedily or let the machine combiner do the evaluation. This is only supported on ARM64. - It gives preference to throughput over latency: the heuristic used is to combine always in loops. The targets decides whether the machine combiner should optimize for throughput or latency. - Supports for fmadd, f(n)msub, fmla, fmls patterns - On by default at O3 ffast-math llvm-svn: 267098
author: Gerolf Hoflehner <ghoflehner@apple.com> 2016-04-22 02:15:19 +0000
committer: Gerolf Hoflehner <ghoflehner@apple.com> 2016-04-22 02:15:19 +0000
commit: b32f11fc62ef12de1762adf588de6ee6bd4b2bb0 (patch)
tree: 6b86fb22b492753a5a4735482ae05a367c41f92e /llvm/lib/CodeGen/MachineCombiner.cpp
parent: 6fb3f19959b39cb2f5cff276db55698464c59fa4 (diff)
download: llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.zip
llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.tar.gz
llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.tar.bz2
1 files changed, 11 insertions, 1 deletions
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 44601d5..6b5c6ba 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -40,6 +40,7 @@ class MachineCombiner : public MachineFunctionPass {
   const TargetRegisterInfo *TRI;
   MCSchedModel SchedModel;
   MachineRegisterInfo *MRI;
+  MachineLoopInfo *MLI; // Current MachineLoopInfo
   MachineTraceMetrics *Traces;
   MachineTraceMetrics::Ensemble *MinInstr;
 
@@ -86,6 +87,7 @@ char &llvm::MachineCombinerID = MachineCombiner::ID;
 
 INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner",
                       "Machine InstCombiner", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
 INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
                     false, false)
@@ -93,6 +95,7 @@ INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner",
 void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesCFG();
   AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
   AU.addRequired<MachineTraceMetrics>();
   AU.addPreserved<MachineTraceMetrics>();
@@ -354,6 +357,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
   DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n");
 
   auto BlockIter = MBB->begin();
+  // Check if the block is in a loop.
+  const MachineLoop *ML = MLI->getLoopFor(MBB);
 
   while (BlockIter != MBB->end()) {
     auto &MI = *BlockIter++;
@@ -406,11 +411,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       if (!NewInstCount)
         continue;
 
+      bool SubstituteAlways = false;
+      if (ML && TII->isThroughputPattern(P))
+        SubstituteAlways = true;
+
       // Substitute when we optimize for codesize and the new sequence has
       // fewer instructions OR
       // the new sequence neither lengthens the critical path nor increases
       // resource pressure.
-      if (doSubstitute(NewInstCount, OldInstCount) ||
+      if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
           (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
                                    InstrIdxForVirtReg, P) &&
            preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
@@ -447,6 +456,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
   SchedModel = STI.getSchedModel();
   TSchedModel.init(SchedModel, &STI, TII);
   MRI = &MF.getRegInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
   OptSize = MF.getFunction()->optForSize();
author	Gerolf Hoflehner <ghoflehner@apple.com>	2016-04-22 02:15:19 +0000
committer	Gerolf Hoflehner <ghoflehner@apple.com>	2016-04-22 02:15:19 +0000
commit	b32f11fc62ef12de1762adf588de6ee6bd4b2bb0 (patch)
tree	6b86fb22b492753a5a4735482ae05a367c41f92e /llvm/lib/CodeGen/MachineCombiner.cpp
parent	6fb3f19959b39cb2f5cff276db55698464c59fa4 (diff)
download	llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.zip llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.tar.gz llvm-b32f11fc62ef12de1762adf588de6ee6bd4b2bb0.tar.bz2