6 files changed, 351 insertions, 14 deletions
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index d758260..1a5f096 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -54,6 +54,7 @@ add_llvm_target(HexagonCodeGen
   HexagonOptAddrMode.cpp
   HexagonOptimizeSZextends.cpp
   HexagonPeephole.cpp
+  HexagonQFPOptimizer.cpp
   HexagonRDFOpt.cpp
   HexagonRegisterInfo.cpp
   HexagonSelectionDAGInfo.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 109aba5..422ab20 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -67,6 +67,8 @@ void initializeHexagonPeepholePass(PassRegistry &);
 void initializeHexagonSplitConst32AndConst64Pass(PassRegistry &);
 void initializeHexagonVectorPrintPass(PassRegistry &);
 
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
 Pass *createHexagonLoopIdiomPass();
 Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
 
@@ -112,6 +114,7 @@ FunctionPass *createHexagonVectorCombineLegacyPass();
 FunctionPass *createHexagonVectorPrint();
 FunctionPass *createHexagonVExtract();
 FunctionPass *createHexagonExpandCondsets();
+FunctionPass *createHexagonQFPOptimizer();
 
 } // end namespace llvm;
 
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 4ddbe7a..ff876f6 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -920,6 +920,10 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
   // successors have been processed.
   RegisterSet BlockDefs, InsDefs;
   for (MachineInstr &MI : *B) {
+    // Stop if the map size is too large.
+    if (IFMap.size() >= MaxIFMSize)
+      break;
+
     InsDefs.clear();
     getInstrDefs(&MI, InsDefs);
     // Leave those alone. They are more transparent than "insert".
@@ -942,8 +946,8 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
 
         findRecordInsertForms(VR, AVs);
         // Stop if the map size is too large.
-        if (IFMap.size() > MaxIFMSize)
-          return;
+        if (IFMap.size() >= MaxIFMSize)
+          break;
       }
     }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
new file mode 100644
index 0000000..479ac90
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -0,0 +1,334 @@
+//===----- HexagonQFPOptimizer.cpp - Qualcomm-FP to IEEE-FP conversions
+// optimizer ------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic infrastructure for optimizing intermediate conversion instructions
+// generated while performing vector floating point operations.
+// Currently run at the starting of the code generation for Hexagon, cleans
+// up redundant conversion instructions and replaces the uses of conversion
+// with appropriate machine operand. Liveness is preserved after this pass.
+//
+// @note: The redundant conversion instructions are not eliminated in this pass.
+// In this pass, we are only trying to replace the uses of conversion
+// instructions with its appropriate QFP instruction. We are leaving the job to
+// Dead instruction Elimination pass to remove redundant conversion
+// instructions.
+//
+// Brief overview of working of this QFP optimizer.
+// This version of Hexagon QFP optimizer basically iterates over each
+// instruction, checks whether if it belongs to hexagon floating point HVX
+// arithmetic instruction category(Add, Sub, Mul). And then it finds the unique
+// definition for the machine operands corresponding to the instruction.
+//
+// Example:
+// MachineInstruction *MI be the HVX vadd instruction
+// MI -> $v0 = V6_vadd_sf $v1, $v2
+// MachineOperand *DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+// MachineOperand *DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+//
+// In the above example, DefMI1 and DefMI2 gives the unique definitions
+// corresponding to the operands($v1 and &v2 respectively) of instruction MI.
+//
+// If both of the definitions are not conversion instructions(V6_vconv_sf_qf32,
+// V6_vconv_hf_qf16), then it will skip optimizing the current instruction and
+// iterates over next instruction.
+//
+// If one the definitions is conversion instruction then our pass will replace
+// the arithmetic instruction with its corresponding mix variant.
+// In the above example, if $v1 is conversion instruction
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32_mix $v3, $v2 ($v1 is replaced with $v3)
+//
+// If both the definitions are conversion instructions then the instruction will
+// be replaced with its qf variant
+// In the above example, if $v1 and $v2 are conversion instructions
+// DefMI1 -> $v1 = V6_vconv_sf_qf32 $v3
+// DefMI2 -> $v2 = V6_vconv_sf_qf32 $v4
+// After Transformation:
+// MI -> $v0 = V6_vadd_qf32 $v3, $v4 ($v1 is replaced with $v3, $v2 is replaced
+// with $v4)
+//
+// Currently, in this pass, we are not handling the case when the definitions
+// are PHI inst.
+//
+//===----------------------------------------------------------------------===//
+#include <unordered_set>
+#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
+
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+#include <vector>
+
+#define DEBUG_TYPE "hexagon-qfp-optimizer"
+
+using namespace llvm;
+
+cl::opt<bool>
+    DisableQFOptimizer("disable-qfp-opt", cl::init(false),
+                       cl::desc("Disable optimization of Qfloat operations."));
+
+namespace {
+const std::map<unsigned short, unsigned short> QFPInstMap{
+    {Hexagon::V6_vadd_hf, Hexagon::V6_vadd_qf16_mix},
+    {Hexagon::V6_vadd_qf16_mix, Hexagon::V6_vadd_qf16},
+    {Hexagon::V6_vadd_sf, Hexagon::V6_vadd_qf32_mix},
+    {Hexagon::V6_vadd_qf32_mix, Hexagon::V6_vadd_qf32},
+    {Hexagon::V6_vsub_hf, Hexagon::V6_vsub_qf16_mix},
+    {Hexagon::V6_vsub_qf16_mix, Hexagon::V6_vsub_qf16},
+    {Hexagon::V6_vsub_sf, Hexagon::V6_vsub_qf32_mix},
+    {Hexagon::V6_vsub_qf32_mix, Hexagon::V6_vsub_qf32},
+    {Hexagon::V6_vmpy_qf16_hf, Hexagon::V6_vmpy_qf16_mix_hf},
+    {Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
+    {Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
+    {Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
+    {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+} // namespace
+
+namespace llvm {
+
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+
+} // namespace llvm
+
+namespace {
+
+struct HexagonQFPOptimizer : public MachineFunctionPass {
+public:
+  static char ID;
+
+  HexagonQFPOptimizer() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+  StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  const HexagonSubtarget *HST = nullptr;
+  const HexagonInstrInfo *HII = nullptr;
+  const MachineRegisterInfo *MRI = nullptr;
+};
+
+char HexagonQFPOptimizer::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(HexagonQFPOptimizer, "hexagon-qfp-optimizer",
+                HEXAGON_QFP_OPTIMIZER, false, false)
+
+FunctionPass *llvm::createHexagonQFPOptimizer() {
+  return new HexagonQFPOptimizer();
+}
+
+bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
+                                      MachineBasicBlock *MBB) {
+
+  // Early exit:
+  // - if instruction is invalid or has too few operands (QFP ops need 2 sources
+  // + 1 dest),
+  // - or does not have a transformation mapping.
+  if (MI->getNumOperands() < 3)
+    return false;
+  auto It = QFPInstMap.find(MI->getOpcode());
+  if (It == QFPInstMap.end())
+    return false;
+  unsigned short InstTy = It->second;
+
+  unsigned Op0F = 0;
+  unsigned Op1F = 0;
+  // Get the reaching defs of MI, DefMI1 and DefMI2
+  MachineInstr *DefMI1 = nullptr;
+  MachineInstr *DefMI2 = nullptr;
+
+  if (MI->getOperand(1).isReg())
+    DefMI1 = MRI->getVRegDef(MI->getOperand(1).getReg());
+  if (MI->getOperand(2).isReg())
+    DefMI2 = MRI->getVRegDef(MI->getOperand(2).getReg());
+  if (!DefMI1 || !DefMI2)
+    return false;
+
+  MachineOperand &Res = MI->getOperand(0);
+  MachineInstr *Inst1 = nullptr;
+  MachineInstr *Inst2 = nullptr;
+  LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
+             DefMI2->dump());
+
+  // Get the reaching defs of DefMI
+  if (DefMI1->getNumOperands() > 1 && DefMI1->getOperand(1).isReg() &&
+      DefMI1->getOperand(1).getReg().isVirtual())
+    Inst1 = MRI->getVRegDef(DefMI1->getOperand(1).getReg());
+
+  if (DefMI2->getNumOperands() > 1 && DefMI2->getOperand(1).isReg() &&
+      DefMI2->getOperand(1).getReg().isVirtual())
+    Inst2 = MRI->getVRegDef(DefMI2->getOperand(1).getReg());
+
+  unsigned Def1OP = DefMI1->getOpcode();
+  unsigned Def2OP = DefMI2->getOpcode();
+
+  MachineInstrBuilder MIB;
+  // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+  if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+       Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+      (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+       Def2OP == Hexagon::V6_vconv_hf_qf16)) {
+
+    // If the reaching defs of DefMI are W register type, we return
+    if ((Inst1 && Inst1->getNumOperands() > 0 && Inst1->getOperand(0).isReg() &&
+         MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+             &Hexagon::HvxWRRegClass) ||
+        (Inst2 && Inst2->getNumOperands() > 0 && Inst2->getOperand(0).isReg() &&
+         MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+             &Hexagon::HvxWRRegClass))
+      return false;
+
+    // Analyze the use operands of the conversion to get their KILL status
+    MachineOperand &Src1 = DefMI1->getOperand(1);
+    MachineOperand &Src2 = DefMI2->getOperand(1);
+
+    Op0F = getKillRegState(Src1.isKill());
+    Src1.setIsKill(false);
+
+    Op1F = getKillRegState(Src2.isKill());
+    Src2.setIsKill(false);
+
+    if (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf) {
+      auto OuterIt = QFPInstMap.find(MI->getOpcode());
+      if (OuterIt == QFPInstMap.end())
+        return false;
+      auto InnerIt = QFPInstMap.find(OuterIt->second);
+      if (InnerIt == QFPInstMap.end())
+        return false;
+      InstTy = InnerIt->second;
+    }
+
+    MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+              .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+              .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+    LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+    return true;
+
+    // Case 2: Left operand is conversion to sf/hf
+  } else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
+               Def2OP != Hexagon::V6_vconv_sf_qf32) ||
+              (Def1OP == Hexagon::V6_vconv_hf_qf16 &&
+               Def2OP != Hexagon::V6_vconv_hf_qf16)) &&
+             !DefMI2->isPHI() &&
+             (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+
+    if (Inst1 && MRI->getRegClass(Inst1->getOperand(0).getReg()) ==
+                     &Hexagon::HvxWRRegClass)
+      return false;
+
+    MachineOperand &Src1 = DefMI1->getOperand(1);
+    MachineOperand &Src2 = MI->getOperand(2);
+
+    Op0F = getKillRegState(Src1.isKill());
+    Src1.setIsKill(false);
+    Op1F = getKillRegState(Src2.isKill());
+    MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+              .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+              .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+    LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+    return true;
+
+    // Case 2: Left operand is conversion to sf/hf
+  } else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
+               Def2OP == Hexagon::V6_vconv_sf_qf32) ||
+              (Def1OP != Hexagon::V6_vconv_hf_qf16 &&
+               Def2OP == Hexagon::V6_vconv_hf_qf16)) &&
+             !DefMI1->isPHI() &&
+             (MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
+    // The second operand of original instruction is converted.
+    // In "mix" instructions, "qf" operand is always the first operand.
+
+    // Caveat: vsub is not commutative w.r.t operands.
+    if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+        InstTy == Hexagon::V6_vsub_qf32_mix)
+      return false;
+
+    if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
+                     &Hexagon::HvxWRRegClass)
+      return false;
+
+    MachineOperand &Src1 = MI->getOperand(1);
+    MachineOperand &Src2 = DefMI2->getOperand(1);
+
+    Op1F = getKillRegState(Src2.isKill());
+    Src2.setIsKill(false);
+    Op0F = getKillRegState(Src1.isKill());
+    MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+              .addReg(Src2.getReg(), Op1F,
+                      Src2.getSubReg()) // Notice the operands are flipped.
+              .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+    LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+    return true;
+  }
+
+  return false;
+}
+
+bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
+
+  bool Changed = false;
+
+  if (DisableQFOptimizer)
+    return Changed;
+
+  HST = &MF.getSubtarget<HexagonSubtarget>();
+  if (!HST->useHVXV68Ops() || !HST->usePackets() ||
+      skipFunction(MF.getFunction()))
+    return false;
+  HII = HST->getInstrInfo();
+  MRI = &MF.getRegInfo();
+
+  MachineFunction::iterator MBBI = MF.begin();
+  LLVM_DEBUG(dbgs() << "\n=== Running QFPOptimzer Pass for : " << MF.getName()
+                    << " Optimize intermediate conversions ===\n");
+  while (MBBI != MF.end()) {
+    MachineBasicBlock *MBB = &*MBBI;
+    MachineBasicBlock::iterator MII = MBBI->instr_begin();
+    while (MII != MBBI->instr_end()) {
+      MachineInstr *MI = &*MII;
+      ++MII; // As MI might be removed.
+
+      if (QFPInstMap.count(MI->getOpcode()) &&
+          MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
+          MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
+        LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+        if (optimizeQfp(MI, MBB)) {
+          MI->eraseFromParent();
+          LLVM_DEBUG(dbgs() << "\t....Removing....");
+          Changed = true;
+        }
+      }
+    }
+    ++MBBI;
+  }
+  return Changed;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f5d8b69..d9824a31 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -220,6 +220,7 @@ LLVMInitializeHexagonTarget() {
   initializeHexagonPeepholePass(PR);
   initializeHexagonSplitConst32AndConst64Pass(PR);
   initializeHexagonVectorPrintPass(PR);
+  initializeHexagonQFPOptimizerPass(PR);
 }
 
 HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
@@ -386,6 +387,7 @@ bool HexagonPassConfig::addInstSelector() {
       addPass(createHexagonGenInsert());
     if (EnableEarlyIf)
       addPass(createHexagonEarlyIfConversion());
+    addPass(createHexagonQFPOptimizer());
   }
 
   return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index e4c0a16..9ab5202 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -300,7 +300,6 @@ private:
     const_iterator end() const { return Blocks.end(); }
   };
 
-  Align getAlignFromValue(const Value *V) const;
   std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
   bool isHvx(const AddrInfo &AI) const;
   // This function is only used for assertions at the moment.
@@ -612,12 +611,6 @@ auto AlignVectors::ByteSpan::values() const -> SmallVector<Value *, 8> {
   return Values;
 }
 
-auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
-  const auto *C = dyn_cast<ConstantInt>(V);
-  assert(C && "Alignment must be a compile-time constant integer");
-  return C->getAlignValue();
-}
-
 auto AlignVectors::getAddrInfo(Instruction &In) const
     -> std::optional<AddrInfo> {
   if (auto *L = isCandidate<LoadInst>(&In))
@@ -631,11 +624,11 @@ auto AlignVectors::getAddrInfo(Instruction &In) const
     switch (ID) {
     case Intrinsic::masked_load:
       return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
-                      getAlignFromValue(II->getArgOperand(1)));
+                      II->getParamAlign(0).valueOrOne());
     case Intrinsic::masked_store:
       return AddrInfo(HVC, II, II->getArgOperand(1),
                       II->getArgOperand(0)->getType(),
-                      getAlignFromValue(II->getArgOperand(2)));
+                      II->getParamAlign(1).valueOrOne());
     }
   }
   return std::nullopt;
@@ -660,9 +653,9 @@ auto AlignVectors::getMask(Value *Val) const -> Value * {
   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
     switch (II->getIntrinsicID()) {
     case Intrinsic::masked_load:
-      return II->getArgOperand(2);
+      return II->getArgOperand(1);
     case Intrinsic::masked_store:
-      return II->getArgOperand(3);
+      return II->getArgOperand(2);
     }
   }
 
@@ -675,7 +668,7 @@ auto AlignVectors::getMask(Value *Val) const -> Value * {
 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
   if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
     if (II->getIntrinsicID() == Intrinsic::masked_load)
-      return II->getArgOperand(3);
+      return II->getArgOperand(2);
   }
   return UndefValue::get(getPayload(Val)->getType());
 }