aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp145
-rw-r--r--llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll372
-rw-r--r--llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll60
-rw-r--r--llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll4
-rw-r--r--llvm/test/CodeGen/Hexagon/vect-qfp.mir202
-rw-r--r--llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir97
8 files changed, 860 insertions, 34 deletions
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 47726d6..55bafde 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -4753,6 +4753,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
}
+bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
+ return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32);
+}
+
// Addressing mode relations.
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index c17e527..48adf82 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -532,6 +532,7 @@ public:
}
MCInst getNop() const override;
+ bool isQFPMul(const MachineInstr *MF) const;
};
/// \brief Create RegSubRegPair from a register MachineOperand
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
index f29a739..8801f69 100644
--- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -58,7 +58,7 @@
// are PHI inst.
//
//===----------------------------------------------------------------------===//
-#include <unordered_set>
+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
#include "Hexagon.h"
@@ -86,6 +86,9 @@ using namespace llvm;
cl::opt<bool>
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
cl::desc("Disable optimization of Qfloat operations."));
+cl::opt<bool> DisableQFOptForMul(
+ "disable-qfp-opt-mul", cl::init(true),
+ cl::desc("Disable optimization of Qfloat operations for multiply."));
namespace {
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,11 +104,21 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
- {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
+ {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
+ {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
+ {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
+ {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
+ {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
+ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
} // namespace
-namespace {
+namespace llvm {
+FunctionPass *createHexagonQFPOptimizer();
+void initializeHexagonQFPOptimizerPass(PassRegistry &);
+} // namespace llvm
+namespace {
struct HexagonQFPOptimizer : public MachineFunctionPass {
public:
static char ID;
@@ -116,6 +129,10 @@ public:
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -142,19 +159,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() {
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
MachineBasicBlock *MBB) {
- // Early exit:
- // - if instruction is invalid or has too few operands (QFP ops need 2 sources
- // + 1 dest),
- // - or does not have a transformation mapping.
- if (MI->getNumOperands() < 3)
+ if (MI->getNumOperands() == 2)
+ return optimizeQfpOneOp(MI, MBB);
+ else if (MI->getNumOperands() == 3)
+ return optimizeQfpTwoOp(MI, MBB);
+ else
return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ unsigned Op0F = 0;
auto It = QFPInstMap.find(MI->getOpcode());
if (It == QFPInstMap.end())
return false;
+
unsigned short InstTy = It->second;
+ // Get the reachind defs of MI
+ MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
+ MachineInstr *ReachDefDef = nullptr;
+
+ // Get the reaching def of the reaching def to check for W reg def
+ if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
+ DefMI->getOperand(1).getReg().isVirtual())
+ ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
+ unsigned ReachDefOp = DefMI->getOpcode();
+ MachineInstrBuilder MIB;
+
+ // Check if the reaching def is a conversion
+ if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
+ ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
+
+ // Return if the reaching def of reaching def is W type
+ if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &SrcOp = DefMI->getOperand(1);
+ Op0F = getKillRegState(SrcOp.isKill());
+ SrcOp.setIsKill(false);
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+ return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
unsigned Op0F = 0;
unsigned Op1F = 0;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
// Get the reaching defs of MI, DefMI1 and DefMI2
MachineInstr *DefMI1 = nullptr;
MachineInstr *DefMI2 = nullptr;
@@ -167,6 +234,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
return false;
MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
MachineInstr *Inst1 = nullptr;
MachineInstr *Inst2 = nullptr;
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
@@ -185,7 +255,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
unsigned Def2OP = DefMI2->getOpcode();
MachineInstrBuilder MIB;
- // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+
+ // Check if the both the reaching defs of MI are qf to sf/hf conversions
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -226,7 +297,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if left operand's reaching def is a conversion to sf/hf
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -250,7 +321,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if right operand's reaching def is a conversion to sf/hf
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
@@ -258,13 +329,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
!DefMI1->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
// The second operand of original instruction is converted.
- // In "mix" instructions, "qf" operand is always the first operand.
-
- // Caveat: vsub is not commutative w.r.t operands.
- if (InstTy == Hexagon::V6_vsub_qf16_mix ||
- InstTy == Hexagon::V6_vsub_qf32_mix)
- return false;
-
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
@@ -275,10 +339,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
Op0F = getKillRegState(Src1.isKill());
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
- .addReg(Src2.getReg(), Op1F,
- Src2.getSubReg()) // Notice the operands are flipped.
- .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix) {
+ if (!HST->useHVXV81Ops())
+ // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
+ return false;
+ // vsub is not commutative w.r.t. operands -> treat it as a special case
+ // to choose the correct mix instruction.
+ if (Def2OP == Hexagon::V6_vconv_sf_qf32)
+ InstTy = Hexagon::V6_vsub_sf_mix;
+ else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
+ InstTy = Hexagon::V6_vsub_hf_mix;
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ } else {
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ }
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
}
@@ -309,15 +389,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
while (MII != MBBI->instr_end()) {
MachineInstr *MI = &*MII;
++MII; // As MI might be removed.
-
- if (QFPInstMap.count(MI->getOpcode()) &&
- MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
- MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
- LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
- if (optimizeQfp(MI, MBB)) {
- MI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "\t....Removing....");
- Changed = true;
+ if (QFPInstMap.count(MI->getOpcode())) {
+ auto OpC = MI->getOpcode();
+ if (DisableQFOptForMul && HII->isQFPMul(MI))
+ continue;
+ if (OpC != Hexagon::V6_vconv_sf_qf32 &&
+ OpC != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
}
}
}
diff --git a/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll
new file mode 100644
index 0000000..9625a605
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/autohvx/xqf-fixup-qfp1.ll
@@ -0,0 +1,372 @@
+; REQUIRES: hexagon-registered-target, silver
+; This tests correct handling of register spills and fills of
+; qf operands during register allocation.
+
+; RUN: llc -mcpu=hexagonv79 -mattr=+hvx-length128b,+hvxv79,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V79
+; RUN: llc -mcpu=hexagonv81 -mattr=+hvx-length128b,+hvxv81,+hvx-ieee-fp,+hvx-qfloat,-long-calls -debug-only=handle-qfp %s 2>&1 -o - | FileCheck %s --check-prefixes V79-81,V81
+
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81: Inserting after conv: [[VREG0:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG0]]
+; V79-81-NEXT: Inserting after conv: [[VREG1:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG1]]
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81: Inserting after conv: [[VREG2:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG2]]
+; V79-81-NEXT: Inserting after conv: [[VREG3:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG3]]
+; V79-81: Finding uses of: renamable $w{{[0-9]+}} = V6_vmpy_qf32_hf
+; V79-81-DAG: Inserting after conv: [[VREG4:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG4]]
+; V79-81-DAG: Inserting after conv: [[VREG5:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable [[VREG5]]
+; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vadd_sf killed renamable [[VREG2]], killed renamable [[VREG0]]
+; V79-81-DAG: Inserting new instruction: $v{{[0-9]+}} = V6_vsub_sf killed renamable $v{{[0-9]+}}, killed renamable $v{{[0-9]+}}
+;
+; V79-81: Analyzing convert instruction: renamable [[VREG6:\$v[0-9]+]] = V6_vconv_hf_qf32 killed renamable $w{{[0-9]+}}
+; V79: Inserting new instruction: [[VREG30:\$v[0-9]+]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG7]], killed [[VREG30]]
+; V79: Inserting new instruction: [[VREG30]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG8]], killed [[VREG30]]
+; V81: Inserting new instruction: [[VREG7:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG7]]
+; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]]
+
+; V79-81: Analyzing convert instruction: renamable [[VREG9:\$v[0-9]+]] = V6_vconv_sf_qf32 killed renamable $v{{[0-9]+}}
+; V79: Inserting new instruction: [[VREG30]] = V6_vd0
+; V79-NEXT: Inserting new instruction: [[VREG10:\$v[0-9]+]] = V6_vadd_sf killed renamable [[VREG10]], killed [[VREG30]]
+; V81: Inserting new instruction: [[VREG8:\$v[0-9]+]] = V6_vconv_qf32_sf killed renamable [[VREG8]]
+
+target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
+target triple = "hexagon"
+
+@.str.1 = private unnamed_addr constant [9 x i8] c"0x%08lx \00", align 1
+@.str.3 = private unnamed_addr constant [173 x i8] c"/prj/qct/llvm/devops/aether/hexbuild/test_trees/MASTER/test/regress/features/hexagon/arch_v68/hvx_ieee_fp/hvx_ieee_fp_test.c:126 0 && \22ERROR: Failed to acquire HVX unit.\\n\22\00", align 1
+@__func__.main = private unnamed_addr constant [5 x i8] c"main\00", align 1
+@.str.5 = private unnamed_addr constant [33 x i8] c"half -3 converted to vhf = %.2f\0A\00", align 1
+@.str.6 = private unnamed_addr constant [35 x i8] c"uhalf 32k converted to vhf = %.2f\0A\00", align 1
+@.str.7 = private unnamed_addr constant [32 x i8] c"sf 0.5 converted to vhf = %.2f\0A\00", align 1
+@.str.8 = private unnamed_addr constant [32 x i8] c"vhf 4.0 conveted to ubyte = %d\0A\00", align 1
+@.str.9 = private unnamed_addr constant [32 x i8] c"vhf 2.0 conveted to uhalf = %d\0A\00", align 1
+@.str.10 = private unnamed_addr constant [30 x i8] c"byte 4 conveted to hf = %.2f\0A\00", align 1
+@.str.11 = private unnamed_addr constant [31 x i8] c"ubyte 4 conveted to hf = %.2f\0A\00", align 1
+@.str.12 = private unnamed_addr constant [27 x i8] c"hf -3 conveted to sf = %f\0A\00", align 1
+@.str.13 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to byte = %d\0A\00", align 1
+@.str.14 = private unnamed_addr constant [31 x i8] c"vhf 4.0 conveted to half = %d\0A\00", align 1
+@.str.16 = private unnamed_addr constant [33 x i8] c"max of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1
+@.str.17 = private unnamed_addr constant [33 x i8] c"min of hf 2.0 and hf 4.0 = %.2f\0A\00", align 1
+@.str.18 = private unnamed_addr constant [32 x i8] c"max of sf 0.5 and sf 0.25 = %f\0A\00", align 1
+@.str.19 = private unnamed_addr constant [32 x i8] c"min of sf 0.5 and sf 0.25 = %f\0A\00", align 1
+@.str.21 = private unnamed_addr constant [25 x i8] c"negate of hf 4.0 = %.2f\0A\00", align 1
+@.str.22 = private unnamed_addr constant [23 x i8] c"abs of hf -6.0 = %.2f\0A\00", align 1
+@.str.23 = private unnamed_addr constant [23 x i8] c"negate of sf 0.5 = %f\0A\00", align 1
+@.str.24 = private unnamed_addr constant [22 x i8] c"abs of sf -0.25 = %f\0A\00", align 1
+@.str.26 = private unnamed_addr constant [32 x i8] c"hf add of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.27 = private unnamed_addr constant [32 x i8] c"hf sub of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.28 = private unnamed_addr constant [31 x i8] c"sf add of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.29 = private unnamed_addr constant [31 x i8] c"sf sub of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.30 = private unnamed_addr constant [36 x i8] c"sf add of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.31 = private unnamed_addr constant [36 x i8] c"sf sub of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.33 = private unnamed_addr constant [32 x i8] c"hf mpy of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.34 = private unnamed_addr constant [35 x i8] c"hf accmpy of 4.0 and -6.0 = %.2f\0A\00", align 1
+@.str.35 = private unnamed_addr constant [36 x i8] c"sf mpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.36 = private unnamed_addr constant [39 x i8] c"sf accmpy of hf 4.0 and hf -6.0 = %f\0A\00", align 1
+@.str.37 = private unnamed_addr constant [31 x i8] c"sf mpy of 0.5 and -0.25 = %f\0A\00", align 1
+@.str.39 = private unnamed_addr constant [25 x i8] c"w copy from sf 0.5 = %f\0A\00", align 1
+@str = private unnamed_addr constant [35 x i8] c"ERROR: Failed to acquire HVX unit.\00", align 1
+@str.40 = private unnamed_addr constant [25 x i8] c"\0AConversion intructions\0A\00", align 1
+@str.41 = private unnamed_addr constant [23 x i8] c"\0AMin/Max instructions\0A\00", align 1
+@str.42 = private unnamed_addr constant [23 x i8] c"\0Aabs/neg instructions\0A\00", align 1
+@str.43 = private unnamed_addr constant [23 x i8] c"\0Aadd/sub instructions\0A\00", align 1
+@str.44 = private unnamed_addr constant [24 x i8] c"\0Amultiply instructions\0A\00", align 1
+@str.45 = private unnamed_addr constant [19 x i8] c"\0Acopy instruction\0A\00", align 1
+
+declare dso_local void @print_vector_words(<32 x i32> noundef %x) local_unnamed_addr #0
+
+; Function Attrs: nofree nounwind optsize
+declare dso_local noundef i32 @printf(ptr nocapture noundef readonly, ...) local_unnamed_addr #0
+
+; Function Attrs: nounwind optsize
+define dso_local i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #1 {
+entry:
+ %call = tail call i32 @acquire_vector_unit(i8 noundef zeroext 0) #6
+ %tobool.not = icmp eq i32 %call, 0
+ br i1 %tobool.not, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %puts = tail call i32 @puts(ptr nonnull dereferenceable(1) @str)
+ tail call void @_Assert(ptr noundef nonnull @.str.3, ptr noundef nonnull @__func__.main) #7
+ unreachable
+
+if.end: ; preds = %entry
+ tail call void @set_double_vector_mode() #6
+ %0 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 16384)
+ %1 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 17408)
+ %2 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -14848)
+ %3 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1056964608)
+ %4 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1048576000)
+ %5 = tail call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 -1098907648)
+ %6 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 -3)
+ %7 = tail call <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32 32768)
+ %puts147 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.40)
+ %8 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32> %6)
+ %bc.i = bitcast <32 x i32> %8 to <64 x half>
+ %9 = extractelement <64 x half> %bc.i, i64 0
+ %conv = fpext half %9 to double
+ %call12 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.5, double noundef %conv) #6
+ %10 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32> %7)
+ %bc.i153 = bitcast <32 x i32> %10 to <64 x half>
+ %11 = extractelement <64 x half> %bc.i153, i64 0
+ %conv14 = fpext half %11 to double
+ %call15 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.6, double noundef %conv14) #6
+ %12 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32> %3, <32 x i32> %3)
+ %bc.i155 = bitcast <32 x i32> %12 to <64 x half>
+ %13 = extractelement <64 x half> %bc.i155, i64 0
+ %conv17 = fpext half %13 to double
+ %call18 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.7, double noundef %conv17) #6
+ %14 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32> %1, <32 x i32> %1)
+ %15 = bitcast <32 x i32> %14 to <128 x i8>
+ %conv.i = extractelement <128 x i8> %15, i64 0
+ %conv20 = zext i8 %conv.i to i32
+ %call21 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.8, i32 noundef %conv20) #6
+ %16 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32> %0)
+ %17 = bitcast <32 x i32> %16 to <64 x i16>
+ %conv.i157 = extractelement <64 x i16> %17, i64 0
+ %conv23 = sext i16 %conv.i157 to i32
+ %call24 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.9, i32 noundef %conv23) #6
+ %18 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32> %14)
+ %bc.i158 = bitcast <64 x i32> %18 to <128 x half>
+ %19 = extractelement <128 x half> %bc.i158, i64 0
+ %conv26 = fpext half %19 to double
+ %call27 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.10, double noundef %conv26) #6
+ %20 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32> %14)
+ %bc.i159 = bitcast <64 x i32> %20 to <128 x half>
+ %21 = extractelement <128 x half> %bc.i159, i64 0
+ %conv29 = fpext half %21 to double
+ %call30 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.11, double noundef %conv29) #6
+ %22 = tail call <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32> %8)
+ %bc.i161 = bitcast <64 x i32> %22 to <64 x float>
+ %23 = extractelement <64 x float> %bc.i161, i64 0
+ %conv32 = fpext float %23 to double
+ %call33 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.12, double noundef %conv32) #6
+ %24 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32> %1, <32 x i32> %1)
+ %25 = bitcast <32 x i32> %24 to <128 x i8>
+ %conv.i162 = extractelement <128 x i8> %25, i64 0
+ %conv35 = zext i8 %conv.i162 to i32
+ %call36 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.13, i32 noundef %conv35) #6
+ %26 = tail call <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32> %1)
+ %27 = bitcast <32 x i32> %26 to <64 x i16>
+ %conv.i163 = extractelement <64 x i16> %27, i64 0
+ %conv38 = sext i16 %conv.i163 to i32
+ %call39 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.14, i32 noundef %conv38) #6
+ %28 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32> %0, <32 x i32> %1)
+ %puts148 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.41)
+ %bc.i164 = bitcast <32 x i32> %28 to <64 x half>
+ %29 = extractelement <64 x half> %bc.i164, i64 0
+ %conv42 = fpext half %29 to double
+ %call43 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.16, double noundef %conv42) #6
+ %30 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32> %0, <32 x i32> %1)
+ %bc.i166 = bitcast <32 x i32> %30 to <64 x half>
+ %31 = extractelement <64 x half> %bc.i166, i64 0
+ %conv45 = fpext half %31 to double
+ %call46 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.17, double noundef %conv45) #6
+ %32 = tail call <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32> %3, <32 x i32> %4)
+ %bc.i168 = bitcast <32 x i32> %32 to <32 x float>
+ %33 = extractelement <32 x float> %bc.i168, i64 0
+ %conv48 = fpext float %33 to double
+ %call49 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.18, double noundef %conv48) #6
+ %34 = tail call <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32> %3, <32 x i32> %4)
+ %bc.i169 = bitcast <32 x i32> %34 to <32 x float>
+ %35 = extractelement <32 x float> %bc.i169, i64 0
+ %conv51 = fpext float %35 to double
+ %call52 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.19, double noundef %conv51) #6
+ %puts149 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.42)
+ %36 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32> %1)
+ %bc.i170 = bitcast <32 x i32> %36 to <64 x half>
+ %37 = extractelement <64 x half> %bc.i170, i64 0
+ %conv55 = fpext half %37 to double
+ %call56 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.21, double noundef %conv55) #6
+ %38 = tail call <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32> %2)
+ %bc.i172 = bitcast <32 x i32> %38 to <64 x half>
+ %39 = extractelement <64 x half> %bc.i172, i64 0
+ %conv58 = fpext half %39 to double
+ %call59 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.22, double noundef %conv58) #6
+ %40 = tail call <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32> %3)
+ %bc.i174 = bitcast <32 x i32> %40 to <32 x float>
+ %41 = extractelement <32 x float> %bc.i174, i64 0
+ %conv61 = fpext float %41 to double
+ %call62 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.23, double noundef %conv61) #6
+ %42 = tail call <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32> %5)
+ %bc.i175 = bitcast <32 x i32> %42 to <32 x float>
+ %43 = extractelement <32 x float> %bc.i175, i64 0
+ %conv64 = fpext float %43 to double
+ %call65 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.24, double noundef %conv64) #6
+ %puts150 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.43)
+ %44 = tail call <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i176 = bitcast <32 x i32> %44 to <64 x half>
+ %45 = extractelement <64 x half> %bc.i176, i64 0
+ %conv68 = fpext half %45 to double
+ %call69 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.26, double noundef %conv68) #6
+ %46 = tail call <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i178 = bitcast <32 x i32> %46 to <64 x half>
+ %47 = extractelement <64 x half> %bc.i178, i64 0
+ %conv71 = fpext half %47 to double
+ %call72 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.27, double noundef %conv71) #6
+ %48 = tail call <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i180 = bitcast <32 x i32> %48 to <32 x float>
+ %49 = extractelement <32 x float> %bc.i180, i64 0
+ %conv74 = fpext float %49 to double
+ %call75 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.28, double noundef %conv74) #6
+ %50 = tail call <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i181 = bitcast <32 x i32> %50 to <32 x float>
+ %51 = extractelement <32 x float> %bc.i181, i64 0
+ %conv77 = fpext float %51 to double
+ %call78 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.29, double noundef %conv77) #6
+ %52 = tail call <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i182 = bitcast <64 x i32> %52 to <64 x float>
+ %53 = extractelement <64 x float> %bc.i182, i64 0
+ %conv80 = fpext float %53 to double
+ %call81 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.30, double noundef %conv80) #6
+ %54 = tail call <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i183 = bitcast <64 x i32> %54 to <64 x float>
+ %55 = extractelement <64 x float> %bc.i183, i64 0
+ %conv83 = fpext float %55 to double
+ %call84 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.31, double noundef %conv83) #6
+ %puts151 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.44)
+ %56 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i184 = bitcast <32 x i32> %56 to <64 x half>
+ %57 = extractelement <64 x half> %bc.i184, i64 0
+ %conv87 = fpext half %57 to double
+ %call88 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.33, double noundef %conv87) #6
+ %58 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32> %56, <32 x i32> %1, <32 x i32> %2)
+ %bc.i186 = bitcast <32 x i32> %58 to <64 x half>
+ %59 = extractelement <64 x half> %bc.i186, i64 0
+ %conv90 = fpext half %59 to double
+ %call91 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.34, double noundef %conv90) #6
+ %60 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32> %1, <32 x i32> %2)
+ %bc.i188 = bitcast <64 x i32> %60 to <64 x float>
+ %61 = extractelement <64 x float> %bc.i188, i64 0
+ %conv93 = fpext float %61 to double
+ %call94 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.35, double noundef %conv93) #6
+ %62 = tail call <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32> %60, <32 x i32> %1, <32 x i32> %2)
+ %bc.i189 = bitcast <64 x i32> %62 to <64 x float>
+ %63 = extractelement <64 x float> %bc.i189, i64 0
+ %conv96 = fpext float %63 to double
+ %call97 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.36, double noundef %conv96) #6
+ %64 = tail call <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32> %3, <32 x i32> %5)
+ %bc.i190 = bitcast <32 x i32> %64 to <32 x float>
+ %65 = extractelement <32 x float> %bc.i190, i64 0
+ %conv99 = fpext float %65 to double
+ %call100 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.37, double noundef %conv99) #6
+ %puts152 = tail call i32 @puts(ptr nonnull dereferenceable(1) @str.45)
+ %66 = tail call <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32> %3)
+ %bc.i191 = bitcast <32 x i32> %66 to <32 x float>
+ %67 = extractelement <32 x float> %bc.i191, i64 0
+ %conv103 = fpext float %67 to double
+ %call104 = tail call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str.39, double noundef %conv103) #6
+ ret i32 0
+}
+
+; Function Attrs: optsize
+declare dso_local i32 @acquire_vector_unit(i8 noundef zeroext) local_unnamed_addr #2
+
+; Function Attrs: noreturn nounwind optsize
+declare dso_local void @_Assert(ptr noundef, ptr noundef) local_unnamed_addr #3
+
+; Function Attrs: optsize
+declare dso_local void @set_double_vector_mode(...) local_unnamed_addr #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.h.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.uh.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.hf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.ub.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.uh.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.b.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.hf.ub.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vcvt.sf.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.b.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vcvt.h.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmax.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmin.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmax.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfmin.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfneg.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vabs.hf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vfneg.sf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vabs.sf.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vadd.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vsub.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vadd.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vsub.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vadd.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vsub.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.hf.hf.acc.128B(<32 x i32>, <32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <64 x i32> @llvm.hexagon.V6.vmpy.sf.hf.acc.128B(<64 x i32>, <32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vmpy.sf.sf.128B(<32 x i32>, <32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.vassign.fp.128B(<32 x i32>) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.lvsplath.128B(i32) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32) #4
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @putchar(i32 noundef) local_unnamed_addr #5
+
+; Function Attrs: nofree nounwind
+declare noundef i32 @puts(ptr nocapture noundef readonly) local_unnamed_addr #5
diff --git a/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll
new file mode 100644
index 0000000..cdb779f
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/hvx-vsub-qf-sf-mix.ll
@@ -0,0 +1,60 @@
+;; RUN: llc --mtriple=hexagon --mcpu=hexagonv81 --mattr=+hvxv81,+hvx-length128b %s -o - | FileCheck %s
+
+define void @mul_and_sub_1(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <32 x float>, ptr %A, align 4
+ %BVec = load <32 x float>, ptr %B, align 4
+ %CVec = load <32 x float>, ptr %C, align 4
+ %AtBVec = fmul <32 x float> %AVec, %BVec
+
+ %DVec = fsub <32 x float> %CVec, %AtBVec
+ store <32 x float> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_1
+;; CHECK: vsub(v{{[0-9]+}}.sf,v{{[0-9]+}}.qf32)
+
+
+define void @mul_and_sub_2(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <32 x float>, ptr %A, align 4
+ %BVec = load <32 x float>, ptr %B, align 4
+ %CVec = load <32 x float>, ptr %C, align 4
+ %AtBVec = fmul <32 x float> %AVec, %BVec
+
+ %DVec = fsub <32 x float> %AtBVec, %CVec
+ store <32 x float> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_2
+;; CHECK: vsub(v{{[0-9]+}}.qf32,v{{[0-9]+}}.sf)
+
+
+define void @mul_and_sub_3(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <64 x half>, ptr %A, align 4
+ %BVec = load <64 x half>, ptr %B, align 4
+ %CVec = load <64 x half>, ptr %C, align 4
+ %AtBVec = fmul <64 x half> %AVec, %BVec
+
+ %DVec = fsub <64 x half> %CVec, %AtBVec
+ store <64 x half> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_3
+;; CHECK: vsub(v{{[0-9]+}}.hf,v{{[0-9]+}}.qf16)
+
+
+define void @mul_and_sub_4(ptr readonly %A, ptr readonly %B, ptr readonly %C, ptr writeonly %D) {
+entry:
+ %AVec = load <64 x half>, ptr %A, align 4
+ %BVec = load <64 x half>, ptr %B, align 4
+ %CVec = load <64 x half>, ptr %C, align 4
+ %AtBVec = fmul <64 x half> %AVec, %BVec
+
+ %DVec = fsub <64 x half> %AtBVec, %CVec
+ store <64 x half> %DVec, ptr %D, align 4
+ ret void
+}
+;; CHECK: mul_and_sub_4
+;; CHECK: vsub(v{{[0-9]+}}.qf16,v{{[0-9]+}}.hf)
diff --git a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
index c16370c..527f27e 100644
--- a/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
+++ b/llvm/test/CodeGen/Hexagon/qfpopt-rem-conv-add.ll
@@ -2,7 +2,7 @@
; type as first parameter instead of a sf type without
; any conversion instruction of type sf = qf32
-; RUN: llc -mtriple=hexagon < %s -o - | FileCheck %s
+; RUN: llc -mtriple=hexagon -mattr=+hvx-length128b,+hvxv75,+v75 < %s -o - | FileCheck %s
; CHECK: [[V2:v[0-9]+]] = vxor([[V2]],[[V2]])
; CHECK: [[V0:v[0-9]+]].qf32 = vmpy([[V0]].sf,[[V2]].sf)
@@ -17,5 +17,3 @@ entry:
store <64 x half> %conv17.ripple.vectorized, ptr %out_ptr, align 2
ret void
}
-
-attributes #0 = { "target-features"="+hvx-length128b,+hvxv75,+v75,-long-calls,-small-data" }
diff --git a/llvm/test/CodeGen/Hexagon/vect-qfp.mir b/llvm/test/CodeGen/Hexagon/vect-qfp.mir
new file mode 100644
index 0000000..6909591
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect-qfp.mir
@@ -0,0 +1,202 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer -disable-qfp-opt-mul=false %s -o - | FileCheck %s --check-prefix=MUL-ENABLED
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s --check-prefix=DEFAULT
+# MUL-ENABLED-LABEL: name: qfpAdd32
+# MUL-ENABLED: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vadd_qf32_mix
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vadd_qf32
+# DEFAULT-LABEL: name: qfpAdd32
+# DEFAULT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vadd_qf32_mix
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vadd_qf32
+---
+name: qfpAdd32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vadd_sf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vadd_sf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vadd_sf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpAdd16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vadd_qf16_mix
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vadd_qf16
+# DEFAULT-LABEL: name: qfpAdd16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vadd_qf16_mix
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vadd_qf16
+---
+name: qfpAdd16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vadd_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vadd_hf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vadd_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpSub32
+# MUL-ENABLED: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vsub_qf32_mix
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vsub_qf32
+# DEFAULT-LABEL: name: qfpSub32
+# DEFAULT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vsub_qf32_mix
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vsub_qf32
+---
+name: qfpSub32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vsub_sf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_sf_qf32 %6:hvxvr
+ %8:hvxvr = V6_vsub_sf %7:hvxvr, %5:hvxvr
+ %9:hvxvr = V6_vconv_sf_qf32 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vsub_sf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_sf_qf32 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpSub16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vsub_qf16_mix
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vsub_qf16
+# DEFAULT-LABEL: name: qfpSub16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vsub_qf16_mix
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vsub_qf16
+---
+name: qfpSub16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vsub_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vsub_hf %7:hvxvr, %5:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vsub_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpMul32
+# MUL-ENABLED: V6_vmpy_qf32_sf
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vmpy_qf32_sf
+# MUL-ENABLED-NEXT: V6_vconv_sf_qf32
+# MUL-ENABLED-NEXT: V6_vmpy_qf32
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# DEFAULT-LABEL: name: qfpMul32
+# DEFAULT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vconv_sf_qf32
+# DEFAULT-NEXT: V6_vmpy_qf32_sf
+# DEFAULT-NEXT: V6_vS32Ub_ai
+---
+name: qfpMul32
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vL32Ub_ai %2:intregs, 0
+ %7:hvxvr = V6_vmpy_qf32_sf %4:hvxvr, %5:hvxvr
+ %8:hvxvr = V6_vconv_sf_qf32 %7:hvxvr
+ %9:hvxvr = V6_vmpy_qf32_sf %5:hvxvr, %6:hvxvr
+ %10:hvxvr = V6_vconv_sf_qf32 %9:hvxvr
+ %11:hvxvr = V6_vmpy_qf32_sf %8:hvxvr, %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
+...
+# MUL-ENABLED-LABEL: name: qfpMul16
+# MUL-ENABLED: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vmpy_qf16_mix_hf
+# MUL-ENABLED-NEXT: V6_vconv_hf_qf16
+# MUL-ENABLED-NEXT: V6_vS32Ub_ai
+# MUL-ENABLED-NEXT: V6_vmpy_qf16
+# DEFAULT-LABEL: name: qfpMul16
+# DEFAULT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vmpy_qf16_hf
+# DEFAULT-NEXT: V6_vconv_hf_qf16
+# DEFAULT-NEXT: V6_vS32Ub_ai
+# DEFAULT-NEXT: V6_vmpy_qf16_hf
+---
+name: qfpMul16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ %0:intregs = COPY $r0
+ %1:intregs = COPY $r1
+ %2:intregs = COPY $r2
+ %3:intregs = COPY $r3
+ %4:hvxvr = V6_vL32Ub_ai %0:intregs, 0
+ %5:hvxvr = V6_vL32Ub_ai %1:intregs, 0
+ %6:hvxvr = V6_vmpy_qf16_hf %4:hvxvr, %5:hvxvr
+ %7:hvxvr = V6_vconv_hf_qf16 %6:hvxvr
+ %8:hvxvr = V6_vmpy_qf16_hf %5:hvxvr, %7:hvxvr
+ %9:hvxvr = V6_vconv_hf_qf16 %8:hvxvr
+ V6_vS32Ub_ai %2:intregs, 0, %9:hvxvr
+ %10:hvxvr = V6_vmpy_qf16_hf %7:hvxvr, %9:hvxvr
+ %11:hvxvr = V6_vconv_hf_qf16 %10:hvxvr
+ V6_vS32Ub_ai %3:intregs, 0, %11:hvxvr
diff --git a/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir
new file mode 100644
index 0000000..482edc8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/vect/vect-qfp-unary.mir
@@ -0,0 +1,97 @@
+# RUN: llc -march=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \
+# RUN: -run-pass hexagon-qfp-optimizer %s -o - | FileCheck %s
+
+
+# CHECK: name: qfp_vilog32
+# CHECK: V6_vilog2_qf32
+---
+name: qfp_vilog32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vilog2_sf $v1
+ V6_vS32Ub_ai $r2, 0, $v2
+...
+
+# CHECK-LABEL: name: qfp_vilog16
+# CHECK: V6_vilog2_qf16
+---
+name: qfp_vilog16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vilog2_hf $v1
+ V6_vS32Ub_ai $r2, 0, $v2
+...
+
+# CHECK: name: qfp_vneg32
+# CHECK: V6_vneg_qf32_qf32
+---
+name: qfp_vneg32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vneg_qf32_sf $v1
+ $v3 = V6_vconv_sf_qf32 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK-LABEL: name: qfp_vneg16
+# CHECK: V6_vneg_qf16_qf16
+---
+name: qfp_vneg16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vneg_qf16_hf $v1
+ $v3 = V6_vconv_hf_qf16 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK: name: qfp_vabs32
+# CHECK: V6_vabs_qf32_qf32
+---
+name: qfp_vabs32
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_sf_qf32 $v0
+ $v2 = V6_vabs_qf32_sf $v1
+ $v3 = V6_vconv_sf_qf32 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...
+
+# CHECK-LABEL: name: qfp_vabs16
+# CHECK: V6_vabs_qf16_qf16
+---
+name: qfp_vabs16
+tracksRegLiveness: true
+
+body: |
+ bb.0:
+ liveins: $r0, $r1, $r2, $r3
+ $v0 = V6_vL32Ub_ai $r0, 0
+ $v1 = V6_vconv_hf_qf16 $v0
+ $v2 = V6_vabs_qf16_hf $v1
+ $v3 = V6_vconv_hf_qf16 $v2
+ V6_vS32Ub_ai $r2, 0, $v3
+...