aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/Hexagon
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/Hexagon')
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/CMakeLists.txt3
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.h4
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCallingConv.td18
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp15
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenInsert.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMux.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp565
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp1181
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp64
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp208
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp28
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h11
-rw-r--r--llvm/lib/Target/Hexagon/HexagonIntrinsics.td114
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp713
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td13
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatternsHVX.td79
-rw-r--r--llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp141
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.td902
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSubtarget.h7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp17
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp21
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h16
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp98
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp14
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h38
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.cpp25
43 files changed, 3671 insertions, 719 deletions
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index b94b148..c18db98 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -463,7 +463,7 @@ void HexagonOperand::print(raw_ostream &OS, const MCAsmInfo &MAI) const {
break;
case Register:
OS << "<register R";
- OS << getReg() << ">";
+ OS << getReg().id() << ">";
break;
case Token:
OS << "'" << getToken() << "'";
diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt
index 1a5f096..eddab5a 100644
--- a/llvm/lib/Target/Hexagon/CMakeLists.txt
+++ b/llvm/lib/Target/Hexagon/CMakeLists.txt
@@ -37,6 +37,8 @@ add_llvm_target(HexagonCodeGen
HexagonGenMemAbsolute.cpp
HexagonGenMux.cpp
HexagonGenPredicate.cpp
+ HexagonGenWideningVecFloatInstr.cpp
+ HexagonGenWideningVecInstr.cpp
HexagonHardwareLoops.cpp
HexagonHazardRecognizer.cpp
HexagonInstrInfo.cpp
@@ -53,6 +55,7 @@ add_llvm_target(HexagonCodeGen
HexagonNewValueJump.cpp
HexagonOptAddrMode.cpp
HexagonOptimizeSZextends.cpp
+ HexagonOptShuffleVector.cpp
HexagonPeephole.cpp
HexagonQFPOptimizer.cpp
HexagonRDFOpt.cpp
diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h
index 422ab20..b98369d 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.h
+++ b/llvm/lib/Target/Hexagon/Hexagon.h
@@ -92,6 +92,9 @@ FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMemAbsolute();
FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &);
FunctionPass *createHexagonHardwareLoops();
FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
CodeGenOptLevel OptLevel);
@@ -102,6 +105,7 @@ FunctionPass *createHexagonMergeActivateWeight();
FunctionPass *createHexagonNewValueJump();
FunctionPass *createHexagonOptAddrMode();
FunctionPass *createHexagonOptimizeSZextends();
+FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &);
FunctionPass *createHexagonPacketizer(bool Minimal);
FunctionPass *createHexagonPeephole();
FunctionPass *createHexagonRDFOpt();
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index ede8463..17c72c3 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -413,6 +413,8 @@ include "HexagonPatternsV65.td"
include "HexagonDepMappings.td"
include "HexagonIntrinsics.td"
+defm : RemapAllTargetPseudoPointerOperands<IntRegs>;
+
def HexagonInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 68f5312..8483374 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -137,8 +137,7 @@ namespace {
return !Bits.any();
}
bool includes(const RegisterSet &Rs) const {
- // A.test(B) <=> A-B != {}
- return !Rs.Bits.test(Bits);
+ return Rs.Bits.subsetOf(Bits);
}
bool intersects(const RegisterSet &Rs) const {
return Bits.anyCommon(Rs.Bits);
@@ -1796,7 +1795,7 @@ namespace {
const MachineDominatorTree &MDT;
const HexagonInstrInfo &HII;
- const HexagonRegisterInfo &HRI;
+ [[maybe_unused]] const HexagonRegisterInfo &HRI;
MachineRegisterInfo &MRI;
BitTracker &BT;
};
@@ -1886,7 +1885,7 @@ bool BitSimplification::matchHalf(unsigned SelfR,
bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
unsigned OpNum) {
- auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI);
+ auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum);
auto *RRC = HBS::getFinalVRegClass(R, MRI);
return OpRC->hasSubClassEq(RRC);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
index eca5ac1..bae3484 100644
--- a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -24,7 +24,6 @@
#include <cstdint>
#include <iterator>
#include <map>
-#include <utility>
using namespace llvm;
diff --git a/llvm/lib/Target/Hexagon/HexagonCallingConv.td b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
index dceb70c..80adde8 100644
--- a/llvm/lib/Target/Hexagon/HexagonCallingConv.td
+++ b/llvm/lib/Target/Hexagon/HexagonCallingConv.td
@@ -25,6 +25,8 @@ def CC_HexagonStack: CallingConv<[
def CC_Hexagon_Legacy: CallingConv<[
CCIfType<[i1,i8,i16],
CCPromoteToType<i32>>,
+ CCIfType<[bf16],
+ CCBitConvertToType<i32>>,
CCIfType<[f32],
CCBitConvertToType<i32>>,
CCIfType<[f64],
@@ -55,6 +57,8 @@ def CC_Hexagon_Legacy: CallingConv<[
def CC_Hexagon: CallingConv<[
CCIfType<[i1,i8,i16],
CCPromoteToType<i32>>,
+ CCIfType<[bf16],
+ CCBitConvertToType<i32>>,
CCIfType<[f32],
CCBitConvertToType<i32>>,
CCIfType<[f64],
@@ -88,6 +92,8 @@ def CC_Hexagon: CallingConv<[
def RetCC_Hexagon: CallingConv<[
CCIfType<[i1,i8,i16],
CCPromoteToType<i32>>,
+ CCIfType<[bf16],
+ CCBitConvertToType<i32>>,
CCIfType<[f32],
CCBitConvertToType<i32>>,
CCIfType<[f64],
@@ -149,16 +155,16 @@ def CC_Hexagon_HVX: CallingConv<[
CCIfType<[v128i1], CCPromoteToType<v128i8>>>,
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16],
CCAssignToReg<[V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v128bf16],
CCAssignToReg<[W0,W1,W2,W3,W4,W5,W6,W7]>>>,
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16],
CCAssignToStack<128,128>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v64bf16],
CCAssignToStack<256,128>>>,
CCDelegateTo<CC_Hexagon>
@@ -175,10 +181,10 @@ def RetCC_Hexagon_HVX: CallingConv<[
// HVX 128-byte mode
CCIfHvx128<
- CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16],
+ CCIfType<[v32i32,v64i16,v128i8,v32f32,v64f16,v64bf16],
CCAssignToReg<[V0]>>>,
CCIfHvx128<
- CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16],
+ CCIfType<[v64i32,v128i16,v256i8,v64f32,v128f16,v128bf16],
CCAssignToReg<[W0]>>>,
CCDelegateTo<RetCC_Hexagon>
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index dd343d9..df61226 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -1405,7 +1405,7 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
int FI = I.getFrameIdx();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
- HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register());
+ HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, Register());
if (IsKill)
MBB.addLiveIn(Reg);
}
@@ -1470,7 +1470,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
MCRegister Reg = I.getReg();
const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
int FI = I.getFrameIdx();
- HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register());
+ HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, Register());
}
return true;
@@ -1814,8 +1814,7 @@ bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
.addReg(SrcR, getKillRegState(IsKill))
.addReg(TmpR0, RegState::Kill);
- auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();
- HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register());
+ HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, Register());
expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
NewRegs.push_back(TmpR0);
@@ -1844,9 +1843,7 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
.addImm(0x01010101);
- MachineFunction &MF = *B.getParent();
- auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
- HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register());
+ HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, Register());
expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
@@ -2225,7 +2222,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
if (!Bad) {
// If the addressing mode is ok, check the register class.
unsigned OpNum = Load ? 0 : 2;
- auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI);
+ auto *RC = HII.getRegClass(In.getDesc(), OpNum);
RC = getCommonRC(SI.RC, RC);
if (RC == nullptr)
Bad = true;
@@ -2395,7 +2392,7 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
SrcOp.getSubReg() };
- auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI);
+ auto *RC = HII.getRegClass(SI.getDesc(), 2);
// The this-> is needed to unconfuse MSVC.
Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
diff --git a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index ff876f6..18fcd6a 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -153,8 +153,7 @@ namespace {
return !BitVector::any();
}
bool includes(const RegisterSet &Rs) const {
- // A.BitVector::test(B) <=> A-B != {}
- return !Rs.BitVector::test(*this);
+ return Rs.BitVector::subsetOf(*this);
}
bool intersects(const RegisterSet &Rs) const {
return BitVector::anyCommon(Rs);
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index 74e5abe..c6fffde 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -43,7 +43,6 @@
#include <cassert>
#include <iterator>
#include <limits>
-#include <utility>
#define DEBUG_TYPE "hexmux"
diff --git a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index 9c81e963..412d587 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -30,7 +30,6 @@
#include <cassert>
#include <iterator>
#include <queue>
-#include <utility>
#define DEBUG_TYPE "gen-pred"
@@ -52,8 +51,7 @@ private:
};
[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
- const PrintRegister &PR);
-raw_ostream &operator<<(raw_ostream &OS, const PrintRegister &PR) {
+ const PrintRegister &PR) {
return OS << printReg(PR.Reg.Reg, &PR.TRI, PR.Reg.SubReg);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
new file mode 100644
index 0000000..7271f1f
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecFloatInstr.cpp
@@ -0,0 +1,565 @@
+//===------------------- HexagonGenWideningVecFloatInstr.cpp --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace widening vector float operations with hexagon intrinsics.
+//
+//===----------------------------------------------------------------------===//
+//
+// Brief overview of working of GenWideningVecFloatInstr pass.
+// This version of pass is replica of already existing pass(which will replace
+// widen vector integer operations with it's respective intrinsics). In this
+// pass we will generate hexagon intrinsics for widen vector float instructions.
+//
+// Example1(64 vector-width widening):
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %1 = fpext <64 x half> %wide.load to <64 x float>
+// %3 = fpext <64 x half> %wide.load53 to <64 x float>
+// %4 = fmul <64 x float> %1, %3
+//
+// If we run this pass on the above example, it will first find fmul
+// instruction, and then it will check whether the operands of fmul instruction
+// (%1 and %3) belongs to either of these categories [%1 ->fpext, %3 ->fpext]
+// or [%1 ->fpext, %3 ->constant_vector] or [%1 ->constant_vector, %3 ->fpext].
+// If it sees such pattern, then this pass will replace such pattern with
+// appropriate hexagon intrinsics.
+//
+// After replacement:
+// %wide.load = load <64 x half>, <64 x half>* %0, align 2
+// %wide.load53 = load <64 x half>, <64 x half>* %2, align 2
+// %3 = bitcast <64 x half> %wide.load to <32 x i32>
+// %4 = bitcast <64 x half> %wide.load53 to <32 x i32>
+// %5 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %4)
+// %6 = shufflevector <64 x i32> %5, <64 x i32> poison, <64 x i32> ShuffMask1
+// %7 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %6)
+// %8 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %6)
+// %9 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %7)
+// %10 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %8)
+// %11 = bitcast <32 x i32> %9 to <32 x float>
+// %12 = bitcast <32 x i32> %10 to <32 x float>
+// %13 = shufflevector <32 x float> %12, <32 x float> %11, <64 x i32> ShuffMask2
+//
+//
+//
+// Example2(128 vector-width widening):
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = fpext <128 x half> %wide.load to <128 x float>
+// %2 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %2, align 2
+// %3 = fpext <128 x half> %wide.load2 to <128 x float>
+// %4 = fmul <128 x float> %1, %3
+//
+// After replacement:
+// %0 = bitcast half* %a to <128 x half>*
+// %wide.load = load <128 x half>, <128 x half>* %0, align 2
+// %1 = bitcast half* %b to <128 x half>*
+// %wide.load2 = load <128 x half>, <128 x half>* %1, align 2
+// %2 = bitcast <128 x half> %wide.load to <64 x i32>
+// %3 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %2)
+// %4 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %2)
+// %5 = bitcast <128 x half> %wide.load2 to <64 x i32>
+// %6 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %5)
+// %7 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %5)
+// %8 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%3, %6)
+// %9 = shufflevector <64 x i32> %8, <64 x i32> poison, <64 x i32> Mask1
+// %10 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %9)
+// %11 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %9)
+// %12 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %10)
+// %13 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %11)
+// %14 = bitcast <32 x i32> %12 to <32 x float>
+// %15 = bitcast <32 x i32> %13 to <32 x float>
+// %16 = shufflevector <32 x float> %15, <32 x float> %14, <64 x i32> Mask2
+// %17 = call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%4, %7)
+// %18 = shufflevector <64 x i32> %17, <64 x i32> poison, <64 x i32> Mask1
+// %19 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %18)
+// %20 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %18)
+// %21 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %19)
+// %22 = call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %20)
+// %23 = bitcast <32 x i32> %21 to <32 x float>
+// %24 = bitcast <32 x i32> %22 to <32 x float>
+// %25 = shufflevector <32 x float> %24, <32 x float> %23, <64 x i32> Mask2
+// %26 = shufflevector <64 x float> %25, <64 x float> %16, <128 x i32> Mask3
+//
+//
+//===----------------------------------------------------------------------===//
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+namespace llvm {
+void initializeHexagonGenWideningVecFloatInstrPass(PassRegistry &);
+FunctionPass *
+createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &);
+} // end namespace llvm
+
+namespace {
+
+class HexagonGenWideningVecFloatInstr : public FunctionPass {
+public:
+ static char ID;
+
+ HexagonGenWideningVecFloatInstr() : FunctionPass(ID) {
+ initializeHexagonGenWideningVecFloatInstrPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ HexagonGenWideningVecFloatInstr(const HexagonTargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {
+ initializeHexagonGenWideningVecFloatInstrPass(
+ *PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon generate widening vector float instructions";
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ Module *M = nullptr;
+ const HexagonTargetMachine *TM = nullptr;
+ const HexagonSubtarget *HST = nullptr;
+ unsigned HwVLen;
+ unsigned NumHalfEltsInFullVec;
+
+ struct OPInfo {
+ Value *OP;
+ Value *ExtInOP;
+ unsigned ExtInSize;
+ };
+
+ bool visitBlock(BasicBlock *B);
+ bool processInstruction(Instruction *Inst);
+ bool replaceWithIntrinsic(Instruction *Inst, OPInfo &OP1Info,
+ OPInfo &OP2Info);
+
+ bool getOperandInfo(Value *V, OPInfo &OPI);
+ bool isExtendedConstant(Constant *C);
+ unsigned getElementSizeInBits(Value *V);
+ Type *getElementTy(unsigned size, IRBuilder<> &IRB);
+
+ Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
+ unsigned NewEltsize, unsigned NumElts);
+
+ std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst);
+
+ Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
+ Value *NewOP2, FixedVectorType *ResType,
+ unsigned NumElts, bool BitCastOp);
+};
+
+} // end anonymous namespace
+
+char HexagonGenWideningVecFloatInstr::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+ "Hexagon generate "
+ "widening vector float instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenWideningVecFloatInstr, "widening-vec-float",
+ "Hexagon generate "
+ "widening vector float instructions",
+ false, false)
+
+bool HexagonGenWideningVecFloatInstr::isExtendedConstant(Constant *C) {
+ if (Value *SplatV = C->getSplatValue()) {
+ if (auto *CFP = dyn_cast<ConstantFP>(SplatV)) {
+ bool Ignored;
+ APFloat APF = CFP->getValueAPF();
+ APFloat::opStatus sts = APF.convert(
+ APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ if (sts == APFloat::opStatus::opOK || sts == APFloat::opStatus::opInexact)
+ return true;
+ }
+ return false;
+ }
+ unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (auto *CFP = dyn_cast<ConstantFP>(C->getAggregateElement(i))) {
+ bool Ignored;
+ APFloat APF = CFP->getValueAPF();
+ APFloat::opStatus sts = APF.convert(
+ APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
+ if (sts != APFloat::opStatus::opOK && sts != APFloat::opStatus::opInexact)
+ return false;
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+unsigned HexagonGenWideningVecFloatInstr::getElementSizeInBits(Value *V) {
+ Type *ValTy = V->getType();
+ Type *EltTy = ValTy;
+ if (dyn_cast<Constant>(V)) {
+ unsigned EltSize =
+ cast<VectorType>(EltTy)->getElementType()->getPrimitiveSizeInBits();
+ unsigned ReducedSize = EltSize / 2;
+
+ return ReducedSize;
+ }
+
+ if (ValTy->isVectorTy())
+ EltTy = cast<VectorType>(ValTy)->getElementType();
+ return EltTy->getPrimitiveSizeInBits();
+}
+
+bool HexagonGenWideningVecFloatInstr::getOperandInfo(Value *V, OPInfo &OPI) {
+ using namespace PatternMatch;
+ OPI.OP = V;
+ Value *ExtV = nullptr;
+ Constant *C = nullptr;
+
+ if (match(V, (m_FPExt(m_Value(ExtV)))) ||
+ match(V,
+ m_Shuffle(m_InsertElt(m_Poison(), m_FPExt(m_Value(ExtV)), m_Zero()),
+ m_Poison(), m_ZeroMask()))) {
+
+ if (auto *ExtVType = dyn_cast<VectorType>(ExtV->getType())) {
+ // Matches the first branch.
+ if (ExtVType->getElementType()->isBFloatTy())
+ // do not confuse bf16 with ieee-fp16.
+ return false;
+ } else {
+ // Matches the second branch (insert element branch)
+ if (ExtV->getType()->isBFloatTy())
+ return false;
+ }
+
+ OPI.ExtInOP = ExtV;
+ OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+ return true;
+ }
+
+ if (match(V, m_Constant(C))) {
+ if (!isExtendedConstant(C))
+ return false;
+ OPI.ExtInOP = C;
+ OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP);
+ return true;
+ }
+
+ return false;
+}
+
+Type *HexagonGenWideningVecFloatInstr::getElementTy(unsigned size,
+ IRBuilder<> &IRB) {
+ switch (size) {
+ case 16:
+ return IRB.getHalfTy();
+ case 32:
+ return IRB.getFloatTy();
+ default:
+ llvm_unreachable("Unhandled Element size");
+ }
+}
+
+Value *HexagonGenWideningVecFloatInstr::adjustExtensionForOp(
+ OPInfo &OPI, IRBuilder<> &IRB, unsigned NewExtSize, unsigned NumElts) {
+ Value *V = OPI.ExtInOP;
+ unsigned EltSize = getElementSizeInBits(OPI.ExtInOP);
+ assert(NewExtSize >= EltSize);
+ Type *EltType = getElementTy(NewExtSize, IRB);
+ auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
+
+ if (auto *C = dyn_cast<Constant>(V))
+ return IRB.CreateFPTrunc(C, NewOpTy);
+
+ if (V->getType()->isVectorTy())
+ if (NewExtSize == EltSize)
+ return V;
+
+ return nullptr;
+}
+
+std::pair<Value *, Value *>
+HexagonGenWideningVecFloatInstr::opSplit(Value *OP, Instruction *Inst) {
+ Type *InstTy = Inst->getType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ IRBuilder<> IRB(Inst);
+ Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
+ Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
+ Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi);
+ Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo);
+ if (NumElts == 128) {
+ auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
+ OP = IRB.CreateBitCast(OP, InType);
+ }
+ Value *OP1Hi = IRB.CreateCall(ExtFHi, {OP});
+ Value *OP1Lo = IRB.CreateCall(ExtFLo, {OP});
+ return std::pair<Value *, Value *>(OP1Hi, OP1Lo);
+}
+
+Value *HexagonGenWideningVecFloatInstr::createIntrinsic(
+ Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
+ FixedVectorType *ResType, unsigned NumElts, bool BitCastOp) {
+
+ IRBuilder<> IRB(Inst);
+ Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId);
+ Function *ConvF = Intrinsic::getOrInsertDeclaration(
+ M, Intrinsic::hexagon_V6_vconv_sf_qf32_128B);
+ auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);
+ auto *RType = FixedVectorType::get(IRB.getFloatTy(), 32);
+
+ // Make sure inputs to vmpy instrinsic are full vectors
+ if (NumElts == NumHalfEltsInFullVec / 2) {
+ SmallVector<Constant *, 16> ConcatMask1;
+ for (unsigned i = 0; i < NumHalfEltsInFullVec; ++i)
+ ConcatMask1.push_back(IRB.getInt32(i));
+ NewOP1 =
+ IRB.CreateShuffleVector(NewOP1, PoisonValue::get(NewOP1->getType()),
+ ConstantVector::get(ConcatMask1));
+ NewOP2 =
+ IRB.CreateShuffleVector(NewOP2, PoisonValue::get(NewOP2->getType()),
+ ConstantVector::get(ConcatMask1));
+ }
+
+ if (BitCastOp) {
+ NewOP1 = IRB.CreateBitCast(NewOP1, InType);
+ NewOP2 = IRB.CreateBitCast(NewOP2, InType);
+ }
+
+ Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});
+ // Interleave the output elements to ensure correct order in Hi and Lo vectors
+ // Shuffled Mask: [0, 32, 1, 33, ..., 31, 63]
+ // Hi: [0, 1, ..., 31] and Lo: [32, 33, ..., 63]
+ SmallVector<Constant *, 16> Mask;
+ unsigned HalfVecPoint = NumHalfEltsInFullVec / 2;
+ for (unsigned i = 0; i < HalfVecPoint; ++i) {
+ Mask.push_back(IRB.getInt32(i));
+ Mask.push_back(IRB.getInt32(HalfVecPoint + i));
+ }
+ NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(NewIn->getType()),
+ ConstantVector::get(Mask));
+
+ std::pair<Value *, Value *> SplitOP = opSplit(NewIn, Inst);
+ Value *ConvHi = IRB.CreateCall(ConvF, {SplitOP.first});
+ ConvHi = IRB.CreateBitCast(ConvHi, RType);
+
+ if (ResType->getNumElements() == NumHalfEltsInFullVec / 2) {
+ return ConvHi;
+ }
+
+ Value *ConvLo = IRB.CreateCall(ConvF, {SplitOP.second});
+ ConvLo = IRB.CreateBitCast(ConvLo, RType);
+
+ SmallVector<Constant *, 16> ShuffleMask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i));
+ // Concat Hi and Lo.
+ NewIn =
+ IRB.CreateShuffleVector(ConvLo, ConvHi, ConstantVector::get(ShuffleMask));
+ return NewIn;
+}
+
+bool HexagonGenWideningVecFloatInstr::replaceWithIntrinsic(Instruction *Inst,
+ OPInfo &OP1Info,
+ OPInfo &OP2Info) {
+ Type *InstTy = Inst->getType();
+ Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ [[maybe_unused]] unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
+
+ unsigned MaxEltSize = OP1Info.ExtInSize;
+ unsigned NewOpEltSize = MaxEltSize;
+ unsigned NewResEltSize = 2 * MaxEltSize;
+
+ unsigned ResVLen = NewResEltSize * NumElts;
+ if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))
+ return false;
+
+ Intrinsic::ID IntId = Intrinsic::hexagon_V6_vmpy_qf32_hf_128B;
+ IRBuilder<> IRB(Inst);
+ Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);
+ Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);
+
+ if (NewOP1 == nullptr || NewOP2 == nullptr)
+ return false;
+
+ if (ResVLen > 2 * HwVLen) {
+ // The code written in this if block generates the widening code when
+ // vector-width is 128:
+ //
+ // Step 1: Bitcast <128 x half> type to <64 x i32>
+ // %wide.load = load <128 x half>, <128 x half>* %0 is bitcasted to,
+ // bitcast <128 x half> %wide.load to <64 x i32>
+ //
+ // Step 2: Generate Hi and Lo vectors
+ // call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %4)
+ // call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %4)
+ //
+ // Perform above 2 steps for both the operands of fmul instruction
+ //
+ // Step 3: Generate vmpy_qf32_hf multiply instruction to multiply two Hi
+ // vectors from both operands.
+ // call <64 x i32> @llvm.hexagon.V6.vmpy.qf32.hf.128B(%5, %8)
+ //
+ // Step 4: Convert the resultant 'qf32' output to 'sf' format
+ // %11 = shufflevector <64 x i32> %10, <64 x i32> poison, <64 x i32> Mask1
+ // %12 = call <32 x i32> @llvm.hexagon.V6.hi.128B(<64 x i32> %11)
+ // %13 = call <32 x i32> @llvm.hexagon.V6.lo.128B(<64 x i32> %11)
+ // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %12)
+ // call <32 x i32> @llvm.hexagon.V6.vconv.sf.qf32.128B(<32 x i32> %13)
+ //
+ // Repeat steps 3 and 4 for mutiplication and conversion of Lo vectors.
+ // Finally merge the output values in correct sequence using shuffle
+ // vectors.
+
+ assert(ResVLen == 4 * HwVLen);
+ // Split the operands
+ unsigned HalfElts = NumElts / 2;
+ std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst);
+ std::pair<Value *, Value *> SplitOP2 = opSplit(NewOP2, Inst);
+ auto *castResType = FixedVectorType::get(IRB.getInt32Ty(), HalfElts);
+ Value *NewInHi =
+ createIntrinsic(IntId, Inst, SplitOP1.first, SplitOP2.first,
+ castResType, HalfElts, false);
+ Value *NewInLo =
+ createIntrinsic(IntId, Inst, SplitOP1.second, SplitOP2.second,
+ castResType, HalfElts, false);
+ assert(InstEltSize == NewResEltSize);
+ SmallVector<Constant *, 8> ShuffleMask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i));
+ // Concat Hi and Lo.
+ Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,
+ ConstantVector::get(ShuffleMask));
+
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+ }
+
+ auto *ResType =
+ FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
+
+ // The following widening code can only be generated in cases where
+ // input vectors are 64xhalf/32xhalf and the results are 64xfloat/32xfloat
+ // respectively.
+ if (!(NumElts == NumHalfEltsInFullVec &&
+ ResType->getNumElements() == NumHalfEltsInFullVec) &&
+ !(NumElts == NumHalfEltsInFullVec / 2 &&
+ ResType->getNumElements() == NumHalfEltsInFullVec / 2))
+ return false;
+ Value *NewIn =
+ createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);
+
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+// Process instruction and replace them with widening vector
+// intrinsics if possible.
+bool HexagonGenWideningVecFloatInstr::processInstruction(Instruction *Inst) {
+ Type *InstTy = Inst->getType();
+ if (!InstTy->isVectorTy() ||
+ cast<FixedVectorType>(InstTy)->getNumElements() > 128)
+ return false;
+ unsigned InstLen = InstTy->getPrimitiveSizeInBits();
+ if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)
+ return false;
+ if (InstLen < HwVLen)
+ return false;
+
+ using namespace PatternMatch;
+
+ Value *OP1 = nullptr, *OP2 = nullptr;
+ OPInfo OP1Info, OP2Info;
+
+ // Handle the case when Inst = fpext(fmul<64xhalf>(op1, op2)). The Inst can
+ // be replaced with widening multiply.
+ if (match(Inst, (m_FPExt((m_FMul(m_Value(OP1), m_Value(OP2))))))) {
+ OP1Info.ExtInOP = OP1;
+ OP1Info.ExtInSize = getElementSizeInBits(OP1);
+ OP2Info.ExtInOP = OP2;
+ OP2Info.ExtInSize = getElementSizeInBits(OP2);
+
+ if (auto *Op1Vtype = dyn_cast<VectorType>(OP1->getType())) {
+ if (!Op1Vtype->getElementType()->isHalfTy()) {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16 &&
+ getElementSizeInBits(Inst) == 32) {
+ return replaceWithIntrinsic(Inst, OP1Info, OP2Info);
+ }
+ }
+
+ if (!match(Inst, (m_FMul(m_Value(OP1), m_Value(OP2)))))
+ return false;
+
+ if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))
+ return false;
+
+ if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)
+ return false;
+
+ if (OP1Info.ExtInSize == OP2Info.ExtInSize && OP1Info.ExtInSize == 16) {
+ return replaceWithIntrinsic(Inst, OP1Info, OP2Info);
+ }
+
+ return false;
+}
+
+bool HexagonGenWideningVecFloatInstr::visitBlock(BasicBlock *B) {
+ bool Changed = false;
+ for (auto &I : *B)
+ Changed |= processInstruction(&I);
+ return Changed;
+}
+
+bool HexagonGenWideningVecFloatInstr::runOnFunction(Function &F) {
+ M = F.getParent();
+ HST = TM->getSubtargetImpl(F);
+
+ // Return if useHVX128BOps is not set. It can be enabled for 64B mode
+ // but wil require some changes. For example, bitcast for intrinsics
+ // assumes 128B mode.
+ if (skipFunction(F) || !HST->useHVX128BOps())
+ return false;
+
+ unsigned VecLength = HST->getVectorLength(); // Vector Length in Bytes
+ HwVLen = HST->getVectorLength() * 8; // Vector Length in bits
+ NumHalfEltsInFullVec =
+ VecLength /
+ 2; // Number of half (2B) elements that fit into a full HVX vector
+ bool Changed = false;
+ for (auto &B : F)
+ Changed |= visitBlock(&B);
+
+ return Changed;
+}
+
+FunctionPass *
+llvm::createHexagonGenWideningVecFloatInstr(const HexagonTargetMachine &TM) {
+ return new HexagonGenWideningVecFloatInstr(&TM);
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp
new file mode 100644
index 0000000..297410b
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonGenWideningVecInstr.cpp
@@ -0,0 +1,1181 @@
+//===--------------------- HexagonGenWideningVecInstr.cpp -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace widening vector operations with hexagon intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+#include <utility>
+
+using namespace llvm;
+
+// A command line argument to enable the generation of widening instructions
+// for short-vectors.
+static cl::opt<bool> WidenShortVector(
+ "hexagon-widen-short-vector",
+ cl::desc("Generate widening instructions for short vectors."), cl::Hidden);
+
+namespace llvm {
+void initializeHexagonGenWideningVecInstrPass(PassRegistry &);
+FunctionPass *createHexagonGenWideningVecInstr(const HexagonTargetMachine &);
+} // end namespace llvm
+
+namespace {
+
+class HexagonGenWideningVecInstr : public FunctionPass {
+public:
+ static char ID;
+
+ HexagonGenWideningVecInstr() : FunctionPass(ID) {
+ initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry());
+ }
+
+ HexagonGenWideningVecInstr(const HexagonTargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {
+ initializeHexagonGenWideningVecInstrPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon generate widening vector instructions";
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ Module *M = nullptr;
+ const HexagonTargetMachine *TM = nullptr;
+ const HexagonSubtarget *HST = nullptr;
+ unsigned HwVLen;
+ enum OPKind { OP_None = 0, OP_Add, OP_Sub, OP_Mul, OP_Shl };
+
+ struct OPInfo {
+ Value *OP = nullptr;
+ Value *ExtInOP = nullptr;
+ bool IsZExt = false;
+ unsigned ExtInSize = 0;
+ bool IsScalar = false;
+ };
+
+ bool visitBlock(BasicBlock *B);
+ bool processInstruction(Instruction *Inst);
+ bool replaceWithIntrinsic(Instruction *Inst, OPKind OPK, OPInfo &OP1Info,
+ OPInfo &OP2Info);
+ bool getOperandInfo(Value *V, OPInfo &OPI);
+ bool isExtendedConstant(Constant *C, bool IsSigned);
+ unsigned getElementSizeInBits(Value *V, bool IsZExt);
+ Type *getElementTy(unsigned size, IRBuilder<> &IRB);
+
+ Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
+ unsigned NewEltsize, unsigned NumElts);
+
+ Intrinsic::ID getIntrinsic(OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt,
+ unsigned NewOpEltSize, unsigned NewResEltSize,
+ bool IsConstScalar, int ConstOpNum);
+
+ std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst,
+ Type *NewOpType);
+
+ Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
+ Value *NewOP2, Type *ResType, unsigned NumElts,
+ bool Interleave);
+ bool processInstructionForVMPA(Instruction *Inst);
+ bool getVmpaOperandInfo(Value *V, OPInfo &OPI);
+ void reorderVmpaOperands(OPInfo *OPI);
+ bool replaceWithVmpaIntrinsic(Instruction *Inst, OPInfo *OPI);
+ bool genSaturatingInst(Instruction *Inst);
+ bool getMinMax(Constant *MinC, Constant *MaxC, std::pair<int, int> &MinMax);
+ bool isSaturatingVAsr(Instruction *Inst, Value *S, int MinV, int MaxV,
+ bool &IsResSigned);
+ Value *extendShiftByVal(Value *ShiftByVal, IRBuilder<> &IRB);
+ Intrinsic::ID getVAsrIntrinsic(bool IsInSigned, bool IsResSigned);
+ Value *createVAsrIntrinsic(Instruction *Inst, Value *VecOP, Value *ShiftByVal,
+ bool IsResSigned);
+ bool genVAvg(Instruction *Inst);
+ bool checkConstantVector(Value *OP, int64_t &SplatVal, bool IsOPZExt);
+ void updateMPYConst(Intrinsic::ID IntId, int64_t SplatVal, bool IsOPZExt,
+ Value *&OP, IRBuilder<> &IRB);
+ void packConstant(Intrinsic::ID IntId, int64_t SplatVal, Value *&OP,
+ IRBuilder<> &IRB);
+};
+
+} // end anonymous namespace
+
+char HexagonGenWideningVecInstr::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonGenWideningVecInstr, "widening-vec",
+ "Hexagon generate "
+ "widening vector instructions",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonGenWideningVecInstr, "widening-vec",
+ "Hexagon generate "
+ "widening vector instructions",
+ false, false)
+
+static bool hasNegativeValues(Constant *C) {
+ if (Value *SplatV = C->getSplatValue()) {
+ auto *CI = dyn_cast<ConstantInt>(SplatV);
+ assert(CI);
+ return CI->getValue().isNegative();
+ }
+ unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i));
+ assert(CI);
+ if (CI->getValue().isNegative())
+ return true;
+ continue;
+ }
+ return false;
+}
+
+bool HexagonGenWideningVecInstr::getOperandInfo(Value *V, OPInfo &OPI) {
+ using namespace PatternMatch;
+ OPI.OP = V;
+ Value *ExtV = nullptr;
+ Constant *C = nullptr;
+
+ bool Match = false;
+ if ((Match = (match(V, (m_ZExt(m_Value(ExtV)))) ||
+ match(V, m_Shuffle(m_InsertElt(m_Poison(),
+ m_ZExt(m_Value(ExtV)), m_Zero()),
+ m_Poison(), m_ZeroMask()))))) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = true;
+ }
+
+ if (!Match &&
+ (Match = (match(V, (m_SExt(m_Value(ExtV)))) ||
+ match(V, m_Shuffle(m_InsertElt(m_Poison(),
+ m_SExt(m_Value(ExtV)), m_Zero()),
+ m_Poison(), m_ZeroMask()))))) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = false;
+ }
+ if (!Match &&
+ (Match =
+ (match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()),
+ m_Poison(), m_ZeroMask()))))) {
+ if (match(ExtV, m_And(m_Value(), m_SpecificInt(255)))) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = true;
+ OPI.ExtInSize = 8;
+ return true;
+ }
+ if (match(ExtV, m_And(m_Value(), m_SpecificInt(65535)))) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = true;
+ OPI.ExtInSize = 16;
+ return true;
+ }
+ return false;
+ }
+
+ if (!Match && (Match = match(V, m_Constant(C)))) {
+ if (!isExtendedConstant(C, false) && !isExtendedConstant(C, true))
+ return false;
+ OPI.ExtInOP = C;
+ OPI.IsZExt = !hasNegativeValues(C);
+ }
+
+ if (!Match)
+ return false;
+
+ // If the operand is extended, find the element size of its input.
+ if (OPI.ExtInOP)
+ OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
+ return true;
+}
+
+bool HexagonGenWideningVecInstr::isExtendedConstant(Constant *C,
+ bool IsSigned) {
+ Type *CTy = cast<FixedVectorType>(C->getType())->getElementType();
+ unsigned EltSize = CTy->getPrimitiveSizeInBits();
+ unsigned HalfSize = EltSize / 2;
+ if (Value *SplatV = C->getSplatValue()) {
+ if (auto *CI = dyn_cast<ConstantInt>(SplatV))
+ return IsSigned ? isIntN(HalfSize, CI->getSExtValue())
+ : isUIntN(HalfSize, CI->getZExtValue());
+ return false;
+ }
+ unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
+ if ((IsSigned && !isIntN(HalfSize, CI->getSExtValue())) ||
+ (!IsSigned && !isUIntN(HalfSize, CI->getZExtValue())))
+ return false;
+ continue;
+ }
+ return false;
+ }
+ return true;
+}
+
+unsigned HexagonGenWideningVecInstr::getElementSizeInBits(Value *V,
+ bool IsZExt = false) {
+ using namespace PatternMatch;
+ Type *ValTy = V->getType();
+ Type *EltTy = ValTy;
+ if (auto *C = dyn_cast<Constant>(V)) {
+ unsigned NumElts = cast<FixedVectorType>(EltTy)->getNumElements();
+ unsigned EltSize = cast<FixedVectorType>(EltTy)
+ ->getElementType()
+ ->getPrimitiveSizeInBits()
+ .getKnownMinValue();
+ unsigned ReducedSize = EltSize / 2;
+
+ while (ReducedSize >= 8) {
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
+ if (IsZExt) {
+ if (!isUIntN(ReducedSize, CI->getZExtValue()))
+ return EltSize;
+ } else if (!isIntN(ReducedSize, CI->getSExtValue()))
+ return EltSize;
+ }
+ }
+ EltSize = ReducedSize;
+ ReducedSize = ReducedSize / 2;
+ }
+ return EltSize;
+ }
+
+ if (ValTy->isVectorTy())
+ EltTy = cast<FixedVectorType>(ValTy)->getElementType();
+ return EltTy->getPrimitiveSizeInBits();
+}
+
+Value *HexagonGenWideningVecInstr::adjustExtensionForOp(OPInfo &OPI,
+ IRBuilder<> &IRB,
+ unsigned NewExtSize,
+ unsigned NumElts) {
+ Value *V = OPI.ExtInOP;
+ bool IsZExt = OPI.IsZExt;
+ unsigned EltSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
+ Type *EltType = getElementTy(NewExtSize, IRB);
+ auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
+
+ if (dyn_cast<Constant>(V))
+ return IRB.CreateTrunc(V, NewOpTy);
+
+ if (V->getType()->isVectorTy()) {
+ if (NewExtSize == EltSize)
+ return V;
+ assert(NewExtSize == 16);
+ auto *NewOpTy = FixedVectorType::get(IRB.getInt16Ty(), NumElts);
+ return (IsZExt) ? IRB.CreateZExt(V, NewOpTy) : IRB.CreateSExt(V, NewOpTy);
+ }
+
+ // The operand must correspond to a shuffle vector which is used to construct
+ // a vector out of a scalar. Since the scalar value (V) is extended,
+ // replace it with a new shuffle vector with the smaller element size.
+ [[maybe_unused]] auto *I = dyn_cast<Instruction>(OPI.OP);
+ assert(I && I->getOpcode() == Instruction::ShuffleVector);
+
+ if (NewExtSize > EltSize)
+ V = (IsZExt) ? IRB.CreateZExt(V, EltType) : IRB.CreateSExt(V, EltType);
+ else if (NewExtSize < EltSize)
+ V = IRB.CreateTrunc(V, EltType);
+
+ Value *IE =
+ IRB.CreateInsertElement(PoisonValue::get(NewOpTy), V, IRB.getInt32(0));
+
+ SmallVector<Constant *, 8> ShuffleMask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(0));
+
+ return IRB.CreateShuffleVector(IE, PoisonValue::get(NewOpTy),
+ ConstantVector::get(ShuffleMask));
+}
+
+Intrinsic::ID HexagonGenWideningVecInstr::getIntrinsic(
+ OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, unsigned InEltSize,
+ unsigned ResEltSize, bool IsConstScalar, int ConstOpNum) {
+ // Since the operands have been extended, the ResEltSize must be 16 or more.
+ switch (OPK) {
+ case OP_Add:
+ // Both operands should be either zero extended or sign extended.
+ assert(IsOP1ZExt == IsOP2ZExt);
+ if (InEltSize == 8 && ResEltSize == 16) {
+ // Operands must be zero extended as we don't have a widening vector
+ // 'add' that can take signed exteded values.
+ assert(IsOP1ZExt && "Operands must be zero-extended");
+ return Intrinsic::hexagon_vadd_uu;
+ }
+ if (InEltSize == 16 && ResEltSize == 32)
+ return (IsOP1ZExt) ? Intrinsic::hexagon_vadd_uu
+ : Intrinsic::hexagon_vadd_ss;
+
+ llvm_unreachable("Incorrect input and output operand sizes");
+
+ case OP_Sub:
+ // Both operands should be either zero extended or sign extended.
+ assert(IsOP1ZExt == IsOP2ZExt);
+ if (InEltSize == 8 && ResEltSize == 16) {
+ // Operands must be zero extended as we don't have a widening vector
+ // 'sub' that can take signed exteded values.
+ assert(IsOP1ZExt && "Operands must be zero-extended");
+ return Intrinsic::hexagon_vsub_uu;
+ }
+ if (InEltSize == 16 && ResEltSize == 32)
+ return (IsOP1ZExt) ? Intrinsic::hexagon_vsub_uu
+ : Intrinsic::hexagon_vsub_ss;
+
+ llvm_unreachable("Incorrect input and output operand sizes");
+
+ case OP_Mul:
+ assert(ResEltSize == 2 * InEltSize);
+ // Enter inside 'if' block when one of the operand is constant vector
+ if (IsConstScalar) {
+ // When inputs are of 8bit type and output is 16bit type, enter 'if' block
+ if (InEltSize == 8 && ResEltSize == 16) {
+ // Enter the 'if' block, when 2nd operand of the mul instruction is
+ // constant vector, otherwise enter 'else' block
+ if (ConstOpNum == 2 && IsOP1ZExt) {
+ // If the value inside the constant vector is zero-extended, then
+ // return hexagon_vmpy_ub_ub, else return hexagon_vmpy_ub_b
+ return (IsOP2ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
+ : Intrinsic::hexagon_vmpy_ub_b;
+ } else if (ConstOpNum == 1 && IsOP2ZExt) {
+ return (IsOP1ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
+ : Intrinsic::hexagon_vmpy_ub_b;
+ }
+ }
+ // When inputs are of 16bit type and output is 32bit type,
+ // enter 'if' block
+ if (InEltSize == 16 && ResEltSize == 32) {
+ if (IsOP1ZExt && IsOP2ZExt) {
+ // If the value inside the constant vector and other operand is
+ // zero-extended, then return hexagon_vmpy_uh_uh
+ return Intrinsic::hexagon_vmpy_uh_uh;
+ } else if (!IsOP1ZExt && !IsOP2ZExt) {
+ // If the value inside the constant vector and other operand is
+ // sign-extended, then return hexagon_vmpy_h_h
+ return Intrinsic::hexagon_vmpy_h_h;
+ }
+ }
+ }
+ if (IsOP1ZExt)
+ return IsOP2ZExt ? Intrinsic::hexagon_vmpy_uu
+ : Intrinsic::hexagon_vmpy_us;
+ else
+ return IsOP2ZExt ? Intrinsic::hexagon_vmpy_su
+ : Intrinsic::hexagon_vmpy_ss;
+ default:
+ llvm_unreachable("Instruction not handled!");
+ }
+}
+
+Type *HexagonGenWideningVecInstr::getElementTy(unsigned size,
+ IRBuilder<> &IRB) {
+ switch (size) {
+ case 8:
+ return IRB.getInt8Ty();
+ case 16:
+ return IRB.getInt16Ty();
+ case 32:
+ return IRB.getInt32Ty();
+ default:
+ llvm_unreachable("Unhandled Element size");
+ }
+}
+
+Value *HexagonGenWideningVecInstr::createIntrinsic(
+ Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
+ Type *ResType, unsigned NumElts, bool Interleave = true) {
+ IRBuilder<> IRB(Inst);
+ Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId, ResType);
+ Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});
+ if (Interleave) {
+ // Interleave elements in the output vector.
+ SmallVector<Constant *, 16> ShuffleMask;
+ unsigned HalfElts = NumElts / 2;
+ for (unsigned i = 0; i < HalfElts; ++i) {
+ ShuffleMask.push_back(IRB.getInt32(i));
+ ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
+ }
+ NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
+ ConstantVector::get(ShuffleMask));
+ }
+ return NewIn;
+}
+
+std::pair<Value *, Value *>
+HexagonGenWideningVecInstr::opSplit(Value *OP, Instruction *Inst,
+ Type *NewOpType) {
+ Type *InstTy = Inst->getType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ IRBuilder<> IRB(Inst);
+ if (InstTy->getPrimitiveSizeInBits() < 2 * HwVLen) {
+ // The only time we need to split an OP even though it is not a
+ // vector-pair is while generating vasr instruction for the short vector.
+ // Since hi/lo intrinsics can't be used here as they expect the operands to
+ // be of 64xi32 type, the shuffle_vector pair with the appropriate masks is
+ // used instead.
+ assert(NumElts % 2 == 0 && "Unexpected Vector Type!!");
+ unsigned HalfElts = NumElts / 2;
+ SmallVector<Constant *, 8> HiM;
+ SmallVector<Constant *, 8> LoM;
+ for (unsigned i = 0; i < HalfElts; ++i)
+ LoM.push_back(IRB.getInt32(i));
+ for (unsigned i = 0; i < HalfElts; ++i)
+ HiM.push_back(IRB.getInt32(HalfElts + i));
+
+ Value *Hi = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
+ ConstantVector::get(HiM));
+ Value *Lo = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
+ ConstantVector::get(LoM));
+ return std::pair<Value *, Value *>(Hi, Lo);
+ }
+
+ Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
+ Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
+ Function *ExtFHi = Intrinsic::getOrInsertDeclaration(M, IntHi);
+ Function *ExtFLo = Intrinsic::getOrInsertDeclaration(M, IntLo);
+ auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
+ OP = IRB.CreateBitCast(OP, InType);
+ Value *Hi = IRB.CreateCall(ExtFHi, {OP}); // 32xi32
+ Value *Lo = IRB.CreateCall(ExtFLo, {OP});
+ Hi = IRB.CreateBitCast(Hi, NewOpType);
+ Lo = IRB.CreateBitCast(Lo, NewOpType);
+ return std::pair<Value *, Value *>(Hi, Lo);
+}
+
+bool HexagonGenWideningVecInstr::checkConstantVector(Value *OP,
+ int64_t &SplatVal,
+ bool IsOPZExt) {
+ if (auto *C1 = dyn_cast<Constant>(OP)) {
+ if (Value *SplatV = C1->getSplatValue()) {
+ auto *CI = dyn_cast<ConstantInt>(SplatV);
+ if (IsOPZExt) {
+ SplatVal = CI->getZExtValue();
+ } else {
+ SplatVal = CI->getSExtValue();
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
+void HexagonGenWideningVecInstr::updateMPYConst(Intrinsic::ID IntId,
+ int64_t SplatVal, bool IsOPZExt,
+ Value *&OP, IRBuilder<> &IRB) {
+ if ((IntId == Intrinsic::hexagon_vmpy_uu ||
+ IntId == Intrinsic::hexagon_vmpy_us ||
+ IntId == Intrinsic::hexagon_vmpy_su ||
+ IntId == Intrinsic::hexagon_vmpy_ss) &&
+ OP->getType()->isVectorTy()) {
+ // Create a vector with all elements equal to SplatVal
+ Type *VecTy = OP->getType();
+ Value *splatVector =
+ ConstantInt::get(VecTy, static_cast<uint32_t>(SplatVal));
+ OP = IsOPZExt ? IRB.CreateZExt(splatVector, VecTy)
+ : IRB.CreateSExt(splatVector, VecTy);
+ } else {
+ packConstant(IntId, SplatVal, OP, IRB);
+ }
+}
+
+void HexagonGenWideningVecInstr::packConstant(Intrinsic::ID IntId,
+ int64_t SplatVal, Value *&OP,
+ IRBuilder<> &IRB) {
+ uint32_t Val32 = static_cast<uint32_t>(SplatVal);
+ if (IntId == Intrinsic::hexagon_vmpy_ub_ub) {
+ assert(SplatVal >= 0 && SplatVal <= UINT8_MAX);
+ uint32_t packed = (Val32 << 24) | (Val32 << 16) | (Val32 << 8) | Val32;
+ OP = IRB.getInt32(packed);
+ } else if (IntId == Intrinsic::hexagon_vmpy_ub_b) {
+ assert(SplatVal >= INT8_MIN && SplatVal <= INT8_MAX);
+ uint32_t packed = (Val32 << 24) | ((Val32 << 16) & ((1 << 24) - 1)) |
+ ((Val32 << 8) & ((1 << 16) - 1)) |
+ (Val32 & ((1 << 8) - 1));
+ OP = IRB.getInt32(packed);
+ } else if (IntId == Intrinsic::hexagon_vmpy_uh_uh) {
+ assert(SplatVal >= 0 && SplatVal <= UINT16_MAX);
+ uint32_t packed = (Val32 << 16) | Val32;
+ OP = IRB.getInt32(packed);
+ } else if (IntId == Intrinsic::hexagon_vmpy_h_h) {
+ assert(SplatVal >= INT16_MIN && SplatVal <= INT16_MAX);
+ uint32_t packed = (Val32 << 16) | (Val32 & ((1 << 16) - 1));
+ OP = IRB.getInt32(packed);
+ }
+}
+
+bool HexagonGenWideningVecInstr::replaceWithIntrinsic(Instruction *Inst,
+ OPKind OPK,
+ OPInfo &OP1Info,
+ OPInfo &OP2Info) {
+ Type *InstTy = Inst->getType();
+ Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
+
+ bool IsOP1ZExt = OP1Info.IsZExt;
+ bool IsOP2ZExt = OP2Info.IsZExt;
+
+ // The resulting values of 'add' and 'sub' are always sign-extended.
+ bool IsResZExt = (OPK == OP_Mul || OPK == OP_Shl)
+ ? (OP1Info.IsZExt && OP2Info.IsZExt)
+ : false;
+
+ unsigned MaxEltSize = std::max(OP1Info.ExtInSize, OP2Info.ExtInSize);
+ unsigned NewOpEltSize = MaxEltSize;
+ unsigned NewResEltSize = 2 * MaxEltSize;
+
+ // For Add and Sub, both the operands should be either zero extended
+ // or sign extended. In case of a mismatch, they are extended to the
+ // next size (ex: 8 bits -> 16 bits) so that the sign-extended vadd/vsub
+ // instructions can be used. Also, we don't support 8-bits signed vadd/vsub
+ // instructions. They are extended to 16-bits and then signed 16-bits
+ // non-widening vadd/vsub is used to perform the operation.
+ if (OPK != OP_Mul && OPK != OP_Shl &&
+ (IsOP1ZExt != IsOP2ZExt || (!IsOP1ZExt && NewOpEltSize == 8)))
+ NewOpEltSize = 2 * NewOpEltSize;
+
+ unsigned ResVLen = NewResEltSize * NumElts;
+ if (ResVLen < HwVLen && !WidenShortVector)
+ return false;
+ if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))
+ return false;
+
+ IRBuilder<> IRB(Inst);
+ Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);
+ Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);
+
+ if (NewOpEltSize == NewResEltSize) {
+ assert(OPK != OP_Mul && OPK != OP_Shl);
+ // Instead of intrinsics, use vector add/sub.
+ Value *NewIn = IRB.CreateBinOp(cast<BinaryOperator>(Inst)->getOpcode(),
+ NewOP1, NewOP2);
+ if (InstEltSize > NewResEltSize)
+ NewIn = IRB.CreateSExt(NewIn, InstTy);
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+ }
+
+ bool IsConstScalar = false;
+ int64_t SplatVal = 0;
+ int ConstOpNum = 1;
+ if (OPK == OP_Mul || OPK == OP_Shl) {
+ IsConstScalar = checkConstantVector(NewOP1, SplatVal, IsOP1ZExt);
+ if (!IsConstScalar) {
+ IsConstScalar = checkConstantVector(NewOP2, SplatVal, IsOP2ZExt);
+ ConstOpNum = 2;
+ }
+ }
+
+ if (IsConstScalar && OPK == OP_Shl) {
+ if (((NewOpEltSize == 8) && (SplatVal > 0) && (SplatVal < 8)) ||
+ ((NewOpEltSize == 16) && (SplatVal > 0) && (SplatVal < 16))) {
+ SplatVal = 1LL << SplatVal;
+ OPK = OP_Mul;
+ } else {
+ return false;
+ }
+ } else if (!IsConstScalar && OPK == OP_Shl) {
+ return false;
+ }
+
+ Intrinsic::ID IntId = getIntrinsic(OPK, IsOP1ZExt, IsOP2ZExt, NewOpEltSize,
+ NewResEltSize, IsConstScalar, ConstOpNum);
+
+ if (IsConstScalar) {
+ updateMPYConst(IntId, SplatVal, IsOP2ZExt, NewOP2, IRB);
+ }
+
+ // Split the node if it needs more than a vector pair for the result.
+ if (ResVLen > 2 * HwVLen) {
+ assert(ResVLen == 4 * HwVLen);
+ // Split the operands
+ unsigned HalfElts = NumElts / 2;
+ auto *NewOpType =
+ FixedVectorType::get(getElementTy(NewOpEltSize, IRB), HalfElts);
+ auto *ResType =
+ FixedVectorType::get(getElementTy(NewResEltSize, IRB), HalfElts);
+ std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst, NewOpType);
+ std::pair<Value *, Value *> SplitOP2;
+ if (IsConstScalar && (IntId == Intrinsic::hexagon_vmpy_h_h ||
+ IntId == Intrinsic::hexagon_vmpy_uh_uh)) {
+ SplitOP2 = std::pair<Value *, Value *>(NewOP2, NewOP2);
+ } else {
+ SplitOP2 = opSplit(NewOP2, Inst, NewOpType);
+ }
+ Value *NewInHi = createIntrinsic(IntId, Inst, SplitOP1.first,
+ SplitOP2.first, ResType, HalfElts, true);
+ Value *NewInLo = createIntrinsic(IntId, Inst, SplitOP1.second,
+ SplitOP2.second, ResType, HalfElts, true);
+ assert(InstEltSize == NewResEltSize);
+ SmallVector<Constant *, 8> ShuffleMask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i));
+ // Concat Hi and Lo.
+ Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,
+ ConstantVector::get(ShuffleMask));
+
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+ }
+
+ auto *ResType =
+ FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
+ Value *NewIn =
+ createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);
+ if (InstEltSize > NewResEltSize)
+ NewIn = (IsResZExt) ? IRB.CreateZExt(NewIn, InstTy)
+ : IRB.CreateSExt(NewIn, InstTy);
+
+ Inst->replaceAllUsesWith(NewIn);
+
+ return true;
+}
+
+// Process instruction and replace them with widening vector
+// intrinsics if possible.
+bool HexagonGenWideningVecInstr::processInstruction(Instruction *Inst) {
+ Type *InstTy = Inst->getType();
+ if (!InstTy->isVectorTy() ||
+ cast<FixedVectorType>(InstTy)->getNumElements() > 128)
+ return false;
+ unsigned InstLen = InstTy->getPrimitiveSizeInBits();
+ if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)
+ return false;
+ if (InstLen < HwVLen && !WidenShortVector)
+ return false;
+
+ using namespace PatternMatch;
+
+ OPKind OPK;
+ Value *OP1 = nullptr, *OP2 = nullptr;
+ if (match(Inst, (m_Sub(m_Value(OP1), m_Value(OP2)))))
+ OPK = OP_Sub;
+ else if (match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
+ OPK = OP_Add;
+ else if (match(Inst, (m_Mul(m_Value(OP1), m_Value(OP2)))))
+ OPK = OP_Mul;
+ else if (match(Inst, (m_Shl(m_Value(OP1), m_Value(OP2)))))
+ OPK = OP_Shl;
+ else
+ return false;
+
+ OPInfo OP1Info, OP2Info;
+
+ if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))
+ return false;
+
+ // Proceed only if both input operands are extended.
+ if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)
+ return false;
+
+ return replaceWithIntrinsic(Inst, OPK, OP1Info, OP2Info);
+}
+
+bool HexagonGenWideningVecInstr::getVmpaOperandInfo(Value *V, OPInfo &OPI) {
+ using namespace PatternMatch;
+ OPI.OP = V;
+ Value *ExtV, *OP1 = nullptr;
+
+ if (match(V,
+ m_ZExt(m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()),
+ m_Poison(), m_ZeroMask()))) ||
+ match(V,
+ m_Shuffle(m_InsertElt(m_Poison(), m_ZExt(m_Value(ExtV)), m_Zero()),
+ m_Poison(), m_ZeroMask()))) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = true;
+ OPI.IsScalar = true;
+ OPI.ExtInSize = ExtV->getType()->getPrimitiveSizeInBits();
+ return true;
+ }
+
+ ConstantInt *I = nullptr;
+ if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()),
+ m_Poison(), m_ZeroMask())))) {
+ if (match(ExtV, m_And(m_Value(OP1), m_ConstantInt(I)))) {
+ uint32_t IValue = I->getZExtValue();
+ if (IValue <= 255) {
+ OPI.ExtInOP = ExtV;
+ OPI.IsZExt = true;
+ OPI.ExtInSize = 8;
+ OPI.IsScalar = true;
+ return true;
+ }
+ }
+ }
+
+ // Match for non-scalar operands
+ return getOperandInfo(V, OPI);
+}
+
+// Process instruction and replace with the vmpa intrinsic if possible.
+bool HexagonGenWideningVecInstr::processInstructionForVMPA(Instruction *Inst) {
+ using namespace PatternMatch;
+ Type *InstTy = Inst->getType();
+ // TODO: Extend it to handle short vector instructions (< HwVLen).
+ // vmpa instructions produce a vector register pair.
+ if (!InstTy->isVectorTy() || InstTy->getPrimitiveSizeInBits() != 2 * HwVLen)
+ return false;
+
+ Value *OP1 = nullptr, *OP2 = nullptr;
+ if (!match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
+ return false;
+
+ Value *OP[4] = {nullptr, nullptr, nullptr, nullptr};
+ if (!match(OP1, m_Mul(m_Value(OP[0]), m_Value(OP[1]))) ||
+ !match(OP2, m_Mul(m_Value(OP[2]), m_Value(OP[3]))))
+ return false;
+
+ OPInfo OP_Info[4];
+ for (unsigned i = 0; i < 4; i++)
+ if (!getVmpaOperandInfo(OP[i], OP_Info[i]) || !OP_Info[i].ExtInOP)
+ return false;
+
+ return replaceWithVmpaIntrinsic(Inst, OP_Info);
+}
+
+// Reorder operand info in OPI so that the vector operands come before their
+// scalar counterparts.
+void HexagonGenWideningVecInstr::reorderVmpaOperands(OPInfo *OPI) {
+ for (unsigned i = 0; i < 2; i++)
+ if (!OPI[2 * i].ExtInOP->getType()->isVectorTy()) {
+ OPInfo Temp;
+ Temp = OPI[2 * i];
+ OPI[2 * i] = OPI[2 * i + 1];
+ OPI[2 * i + 1] = Temp;
+ }
+}
+
+// Only handles the case where one input to vmpa has to be a scalar
+// and another is a vector. It can be easily extended to cover
+// other types of vmpa instructions.
+bool HexagonGenWideningVecInstr::replaceWithVmpaIntrinsic(Instruction *Inst,
+ OPInfo *OPI) {
+ reorderVmpaOperands(OPI);
+
+ // After reordering of the operands in OPI, the odd elements must have
+ // IsScalar flag set to true. Also, check the even elements for non-scalars.
+ if (!OPI[1].IsScalar || !OPI[3].IsScalar || OPI[0].IsScalar ||
+ OPI[2].IsScalar)
+ return false;
+
+ OPInfo SOPI1 = OPI[1];
+ OPInfo SOPI2 = OPI[3];
+
+ // The scalar operand in the vmpa instructions needs to be an int8.
+ if (SOPI1.ExtInSize != SOPI2.ExtInSize || SOPI1.ExtInSize != 8)
+ return false;
+
+ Type *InstTy = Inst->getType();
+ Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
+
+ unsigned MaxVEltSize = std::max(OPI[0].ExtInSize, OPI[2].ExtInSize);
+ unsigned NewVOpEltSize = MaxVEltSize;
+ unsigned NewResEltSize = 2 * MaxVEltSize;
+
+ if (NumElts * NewVOpEltSize < HwVLen) {
+ // Extend the operand so that we don't end up with an invalid vector size.
+ NewVOpEltSize = 2 * NewVOpEltSize;
+ NewResEltSize = 2 * NewResEltSize;
+ }
+
+ IRBuilder<> IRB(Inst);
+
+ // Construct scalar operand
+ Value *NewSOP1 = SOPI1.ExtInOP;
+ Value *NewSOP2 = SOPI2.ExtInOP;
+
+ Type *S1Ty = NewSOP1->getType();
+ Type *S2Ty = NewSOP2->getType();
+ if (S1Ty->getPrimitiveSizeInBits() < 32)
+ NewSOP1 = IRB.CreateZExt(NewSOP1, IRB.getInt32Ty());
+ if (S2Ty->getPrimitiveSizeInBits() < 32)
+ NewSOP2 = IRB.CreateZExt(NewSOP2, IRB.getInt32Ty());
+
+ Value *SHL = IRB.CreateShl(NewSOP1, IRB.getInt32(8));
+ Value *OR = IRB.CreateOr(SHL, NewSOP2);
+ Intrinsic::ID CombineIntID = Intrinsic::hexagon_A2_combine_ll;
+ Function *ExtF = Intrinsic::getOrInsertDeclaration(M, CombineIntID);
+ Value *ScalarOP = IRB.CreateCall(ExtF, {OR, OR});
+
+ // Construct vector operand
+ Value *NewVOP1 = adjustExtensionForOp(OPI[0], IRB, NewVOpEltSize, NumElts);
+ Value *NewVOP2 = adjustExtensionForOp(OPI[2], IRB, NewVOpEltSize, NumElts);
+
+ // Combine both vector operands to form the vector-pair for vmpa
+ Intrinsic::ID VCombineIntID = Intrinsic::hexagon_V6_vcombine_128B;
+ ExtF = Intrinsic::getOrInsertDeclaration(M, VCombineIntID);
+ Type *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);
+ NewVOP1 = IRB.CreateBitCast(NewVOP1, InType);
+ NewVOP2 = IRB.CreateBitCast(NewVOP2, InType);
+ Value *VecOP = IRB.CreateCall(ExtF, {NewVOP1, NewVOP2});
+
+ Intrinsic::ID VmpaIntID = (NewResEltSize == 16)
+ ? Intrinsic::hexagon_V6_vmpabus_128B
+ : Intrinsic::hexagon_V6_vmpauhb_128B;
+ ExtF = Intrinsic::getOrInsertDeclaration(M, VmpaIntID);
+ auto *ResType =
+ FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
+ Value *NewIn = IRB.CreateCall(ExtF, {VecOP, ScalarOP});
+ NewIn = IRB.CreateBitCast(NewIn, ResType);
+
+ if (InstEltSize > NewResEltSize)
+ // Extend the output to match the original instruction type.
+ NewIn = IRB.CreateSExt(NewIn, InstTy);
+
+ // Interleave elements in the output vector.
+ SmallVector<Constant *, 16> ShuffleMask;
+ unsigned HalfElts = NumElts / 2;
+ for (unsigned i = 0; i < HalfElts; ++i) {
+ ShuffleMask.push_back(IRB.getInt32(i));
+ ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
+ }
+ NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
+ ConstantVector::get(ShuffleMask));
+
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+bool HexagonGenWideningVecInstr::genSaturatingInst(Instruction *Inst) {
+ Type *InstTy = Inst->getType();
+ assert(InstTy->isVectorTy());
+ if (InstTy->getPrimitiveSizeInBits() > HwVLen)
+ return false;
+
+ using namespace PatternMatch;
+ CmpPredicate P1, P2;
+ Value *L1 = nullptr, *T1 = nullptr, *L2 = nullptr, *T2 = nullptr,
+ *L3 = nullptr;
+ Constant *RC1 = nullptr, *FC1 = nullptr, *RC2 = nullptr, *FC2 = nullptr,
+ *RC3 = nullptr;
+
+ // Pattern of interest: ashr -> llvm.smin -> llvm.smax -> trunc
+ // Match trunc instruction
+ if (match(Inst, m_Trunc(m_Intrinsic<Intrinsic::smax>(m_Value(L1),
+ m_Constant(RC1))))) {
+ // Match llvm.smin instruction
+ if (match(L1, m_Intrinsic<Intrinsic::smin>(m_Value(L2), m_Constant(RC2)))) {
+ // Match ashr instruction
+ if (match(L2, m_AShr(m_Value(L3), m_Constant(RC3)))) {
+ std::pair<int, int> MinMax;
+ // get min, max values from operatands of smin and smax
+ if (getMinMax(RC1, RC2, MinMax)) {
+ bool IsResSigned;
+ // Validate the saturating vasr pattern
+ if (isSaturatingVAsr(Inst, L2, MinMax.first, MinMax.second,
+ IsResSigned)) {
+ // Get the shift value from the ashr operand
+ ConstantInt *shift_val =
+ dyn_cast<ConstantInt>(RC3->getSplatValue());
+ if (shift_val) {
+ Value *NewIn =
+ createVAsrIntrinsic(Inst, L3, shift_val, IsResSigned);
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if (!match(Inst, (m_Trunc(m_Select(m_ICmp(P1, m_Value(L1), m_Constant(RC1)),
+ m_Value(T1), m_Constant(FC1))))) ||
+ (T1 != L1 || FC1 != RC1))
+ return false;
+
+ if (!match(L1, m_Select(m_ICmp(P2, m_Value(L2), m_Constant(RC2)), m_Value(T2),
+ m_Constant(FC2))) ||
+ (T2 != L2 || FC2 != RC2))
+ return false;
+
+ if (!((P1 == CmpInst::ICMP_SGT && P2 == CmpInst::ICMP_SLT) ||
+ (P1 == CmpInst::ICMP_SLT && P2 == CmpInst::ICMP_SGT)))
+ return false;
+
+ std::pair<int, int> MinMax;
+ if ((P1 == CmpInst::ICMP_SGT) && (P2 == CmpInst::ICMP_SLT)) {
+ if (!getMinMax(RC1, RC2, MinMax))
+ return false;
+ } else if (!getMinMax(RC2, RC1, MinMax))
+ return false;
+
+ Value *S = L2; // Value being saturated
+
+ // Only AShr instructions are handled.
+ // Also, second operand to AShr must be a scalar.
+ Value *OP1 = nullptr, *ShiftByVal = nullptr;
+ if (!match(S, m_AShr(m_Value(OP1),
+ m_Shuffle(m_InsertElt(m_Poison(), m_Value(ShiftByVal),
+ m_Zero()),
+ m_Poison(), m_ZeroMask()))))
+ return false;
+
+ bool IsResSigned;
+ if (!isSaturatingVAsr(Inst, S, MinMax.first, MinMax.second, IsResSigned))
+ return false;
+
+ Value *NewIn = createVAsrIntrinsic(Inst, OP1, ShiftByVal, IsResSigned);
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+Value *HexagonGenWideningVecInstr::extendShiftByVal(Value *ShiftByVal,
+ IRBuilder<> &IRB) {
+ using namespace PatternMatch;
+ Value *A = nullptr;
+ if (match(ShiftByVal, m_Trunc(m_Value(A))))
+ return A;
+ return IRB.CreateZExt(ShiftByVal, IRB.getInt32Ty());
+}
+
+bool HexagonGenWideningVecInstr::getMinMax(Constant *MinC, Constant *MaxC,
+ std::pair<int, int> &MinMax) {
+ Value *SplatV;
+ if (!(SplatV = MinC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
+ return false;
+ if (!(SplatV = MaxC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
+ return false;
+
+ ConstantInt *MinI = dyn_cast<ConstantInt>(MinC->getSplatValue());
+ ConstantInt *MaxI = dyn_cast<ConstantInt>(MaxC->getSplatValue());
+ MinMax = std::pair<int, int>(MinI->getSExtValue(), MaxI->getSExtValue());
+ return true;
+}
+
+bool HexagonGenWideningVecInstr::isSaturatingVAsr(Instruction *Inst, Value *S,
+ int MinV, int MaxV,
+ bool &IsResSigned) {
+ if (MinV >= MaxV)
+ return false;
+
+ IsResSigned = true;
+ Type *InstTy = Inst->getType();
+ Type *EltTy = cast<VectorType>(InstTy)->getElementType();
+ unsigned TruncSize = EltTy->getPrimitiveSizeInBits();
+
+ int MaxRange, MinRange;
+ if (MinV < 0) { // Saturate to a signed value
+ MaxRange = (1 << (TruncSize - 1)) - 1;
+ MinRange = -(1 << (TruncSize - 1));
+ } else if (MinV == 0) { // Saturate to an unsigned value
+ MaxRange = (1 << (TruncSize)) - 1;
+ MinRange = 0;
+ IsResSigned = false;
+ } else
+ return false;
+
+ if (MinV != MinRange || MaxV != MaxRange)
+ return false;
+
+ auto *SInst = dyn_cast<Instruction>(S);
+ if (SInst->getOpcode() == Instruction::AShr) {
+ Type *SInstTy = SInst->getType();
+ Type *SEltTy = cast<VectorType>(SInstTy)->getElementType();
+ unsigned SInstEltSize = SEltTy->getPrimitiveSizeInBits();
+ if (SInstEltSize != 2 * TruncSize || TruncSize > 16)
+ return false;
+ }
+ return true;
+}
+
+Intrinsic::ID HexagonGenWideningVecInstr::getVAsrIntrinsic(bool IsInSigned,
+ bool IsResSigned) {
+ if (!IsResSigned)
+ return (IsInSigned) ? Intrinsic::hexagon_vasrsat_su
+ : Intrinsic::hexagon_vasrsat_uu;
+ return Intrinsic::hexagon_vasrsat_ss;
+}
+
+Value *HexagonGenWideningVecInstr::createVAsrIntrinsic(Instruction *Inst,
+ Value *VecOP,
+ Value *ShiftByVal,
+ bool IsResSigned) {
+ IRBuilder<> IRB(Inst);
+ Type *ShiftByTy = ShiftByVal->getType();
+ if (ShiftByTy->getPrimitiveSizeInBits() < 32)
+ ShiftByVal = extendShiftByVal(ShiftByVal, IRB);
+
+ Type *InstTy = Inst->getType();
+ Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
+
+ // Replace the instruction with saturating vasr intrinsic.
+ // Since vasr with saturation interleaves elements from both input vectors,
+ // they must be deinterleaved for output to end up in the right order.
+ SmallVector<Constant *, 16> ShuffleMask;
+ unsigned HalfElts = NumElts / 2;
+ // Even elements
+ for (unsigned i = 0; i < HalfElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i * 2));
+ // Odd elements
+ for (unsigned i = 0; i < HalfElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i * 2 + 1));
+
+ VecOP = IRB.CreateShuffleVector(VecOP, PoisonValue::get(VecOP->getType()),
+ ConstantVector::get(ShuffleMask));
+
+ auto *InVecOPTy =
+ FixedVectorType::get(getElementTy(InstEltSize * 2, IRB), HalfElts);
+ std::pair<Value *, Value *> HiLo = opSplit(VecOP, Inst, InVecOPTy);
+ Intrinsic::ID IntID = getVAsrIntrinsic(true, IsResSigned);
+ Function *F = Intrinsic::getOrInsertDeclaration(M, IntID, InVecOPTy);
+ Value *NewIn = IRB.CreateCall(F, {HiLo.first, HiLo.second, ShiftByVal});
+ return IRB.CreateBitCast(NewIn, InstTy);
+}
+
+// Generate vavg instruction.
+bool HexagonGenWideningVecInstr::genVAvg(Instruction *Inst) {
+ using namespace PatternMatch;
+ Type *InstTy = Inst->getType();
+ assert(InstTy->isVectorTy());
+
+ bool Match = false;
+ Value *OP1 = nullptr, *OP2 = nullptr;
+ bool IsSigned;
+ if ((Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_ZExt(m_Value(OP1)),
+ m_ZExt(m_Value(OP2))),
+ m_SpecificInt(1)))))))
+ IsSigned = false;
+ if (!Match &&
+ (Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_SExt(m_Value(OP1)),
+ m_SExt(m_Value(OP2))),
+ m_SpecificInt(1))))) ||
+ match(Inst, m_LShr(m_Add(m_Value(OP1), m_Value(OP2)),
+ m_SpecificInt(1)))))
+ IsSigned = true;
+
+ if (!Match)
+ return false;
+
+ unsigned OP1EltSize = getElementSizeInBits(OP1);
+ unsigned OP2EltSize = getElementSizeInBits(OP2);
+ unsigned NewEltSize = std::max(OP1EltSize, OP2EltSize);
+
+ Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
+ unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
+ unsigned InstLen = InstTy->getPrimitiveSizeInBits();
+
+ // Only vectors that are either smaller, same or twice of the hardware
+ // vector length are allowed.
+ if (InstEltSize < NewEltSize || (InstLen > 2 * HwVLen))
+ return false;
+
+ if ((InstLen > HwVLen) && (InstLen % HwVLen != 0))
+ return false;
+
+ IRBuilder<> IRB(Inst);
+ unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
+ auto *AvgInstTy =
+ FixedVectorType::get(getElementTy(NewEltSize, IRB), NumElts);
+ if (OP1EltSize < NewEltSize)
+ OP1 = (IsSigned) ? IRB.CreateSExt(OP1, AvgInstTy)
+ : IRB.CreateZExt(OP1, AvgInstTy);
+ if (OP2EltSize < NewEltSize)
+ OP2 = (IsSigned) ? IRB.CreateSExt(OP2, AvgInstTy)
+ : IRB.CreateZExt(OP2, AvgInstTy);
+
+ Intrinsic::ID AvgIntID =
+ (IsSigned) ? Intrinsic::hexagon_vavgs : Intrinsic::hexagon_vavgu;
+ Value *NewIn = nullptr;
+
+ // Split operands if they need more than a vector length.
+ if (NewEltSize * NumElts > HwVLen) {
+ unsigned HalfElts = NumElts / 2;
+ auto *ResType =
+ FixedVectorType::get(getElementTy(NewEltSize, IRB), HalfElts);
+ std::pair<Value *, Value *> SplitOP1 = opSplit(OP1, Inst, ResType);
+ std::pair<Value *, Value *> SplitOP2 = opSplit(OP2, Inst, ResType);
+ Value *NewHi = createIntrinsic(AvgIntID, Inst, SplitOP1.first,
+ SplitOP2.first, ResType, NumElts, false);
+ Value *NewLo = createIntrinsic(AvgIntID, Inst, SplitOP1.second,
+ SplitOP2.second, ResType, NumElts, false);
+ SmallVector<Constant *, 8> ShuffleMask;
+ for (unsigned i = 0; i < NumElts; ++i)
+ ShuffleMask.push_back(IRB.getInt32(i));
+ // Concat Hi and Lo.
+ NewIn =
+ IRB.CreateShuffleVector(NewLo, NewHi, ConstantVector::get(ShuffleMask));
+ } else
+ NewIn =
+ createIntrinsic(AvgIntID, Inst, OP1, OP2, AvgInstTy, NumElts, false);
+
+ if (InstEltSize > NewEltSize)
+ // Extend the output to match the original instruction type.
+ NewIn = (IsSigned) ? IRB.CreateSExt(NewIn, InstTy)
+ : IRB.CreateZExt(NewIn, InstTy);
+ Inst->replaceAllUsesWith(NewIn);
+ return true;
+}
+
+bool HexagonGenWideningVecInstr::visitBlock(BasicBlock *B) {
+ bool Changed = false;
+ for (auto &I : *B) {
+ Type *InstTy = I.getType();
+ if (!InstTy->isVectorTy() || !HST->isTypeForHVX(cast<VectorType>(InstTy)))
+ continue;
+
+ unsigned InstLen = InstTy->getPrimitiveSizeInBits();
+ if (InstLen < HwVLen && !WidenShortVector)
+ continue;
+
+ Changed |= processInstructionForVMPA(&I);
+ Changed |= genSaturatingInst(&I);
+ Changed |= genVAvg(&I);
+ }
+ // Generate widening instructions.
+ for (auto &I : *B)
+ Changed |= processInstruction(&I);
+ return Changed;
+}
+
+bool HexagonGenWideningVecInstr::runOnFunction(Function &F) {
+ M = F.getParent();
+ HST = TM->getSubtargetImpl(F);
+
+ // Return if useHVX128BOps is not set. It can be enabled for 64B mode
+ // but wil require some changes. For example, bitcast for intrinsics
+ // assumes 128B mode.
+ if (skipFunction(F) || !HST->useHVX128BOps())
+ return false;
+
+ HwVLen = HST->getVectorLength() * 8; // Vector Length in bits
+ bool Changed = false;
+ for (auto &B : F)
+ Changed |= visitBlock(&B);
+
+ return Changed;
+}
+
+FunctionPass *
+llvm::createHexagonGenWideningVecInstr(const HexagonTargetMachine &TM) {
+ return new HexagonGenWideningVecInstr(&TM);
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
index 0528cbd..683feb1 100644
--- a/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
+++ b/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -50,10 +50,7 @@ public:
const HexagonSubtarget &ST)
: Resources(ST.createDFAPacketizer(II)), TII(HII) { }
- ~HexagonHazardRecognizer() override {
- if (Resources)
- delete Resources;
- }
+ ~HexagonHazardRecognizer() override { delete Resources; }
/// This callback is invoked when a new block of instructions is about to be
/// scheduled. The hazard state is set to an initialized state.
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 3cc146b..728ffef 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -811,8 +811,8 @@ ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
MaskT vshuffvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) {
int Len = Vu.size();
MaskT Vdd(2 * Len);
- std::copy(Vv.begin(), Vv.end(), Vdd.begin());
- std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
+ llvm::copy(Vv, Vdd.begin());
+ llvm::copy(Vu, Vdd.begin() + Len);
auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
@@ -831,8 +831,8 @@ MaskT vshuffvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) {
MaskT vdealvdd(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Rt) {
int Len = Vu.size();
MaskT Vdd(2 * Len);
- std::copy(Vv.begin(), Vv.end(), Vdd.begin());
- std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
+ llvm::copy(Vv, Vdd.begin());
+ llvm::copy(Vu, Vdd.begin() + Len);
auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 526b4de..025e5b0 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -1504,8 +1504,8 @@ HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
const HexagonSubtarget &ST)
- : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
- Subtarget(ST) {
+ : TargetLowering(TM, ST),
+ HTM(static_cast<const HexagonTargetMachine &>(TM)), Subtarget(ST) {
auto &HRI = *Subtarget.getRegisterInfo();
setPrefLoopAlignment(Align(16));
@@ -1677,6 +1677,8 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
}
// Turn FP truncstore into trunc + store.
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
// Turn FP extload into load/fpextend.
for (MVT VT : MVT::fp_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
@@ -1872,9 +1874,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f32, Expand);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f64, Expand);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
@@ -2107,7 +2115,7 @@ static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
/// true and store the intrinsic information into the IntrinsicInfo that was
/// passed to the function.
bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
+ const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const {
switch (Intrinsic) {
@@ -2519,7 +2527,7 @@ HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
// Make sure to always cast to IntTy.
if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
const ConstantInt *CI = CN->getConstantIntValue();
- Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
+ Consts[i] = ConstantInt::getSigned(IntTy, CI->getValue().getSExtValue());
} else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
const ConstantFP *CF = CN->getConstantFPValue();
APInt A = CF->getValueAPF().bitcastToAPInt();
@@ -3948,3 +3956,51 @@ HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
return AtomicExpansionKind::LLSC;
}
+
+bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial(
+ const Instruction &AndI) const {
+ // Only sink 'and' mask to cmp use block if it is masking a single bit since
+ // this will fold the and/cmp/br into a single tstbit instruction.
+ ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
+ if (!Mask)
+ return false;
+ return Mask->getValue().isPowerOf2();
+}
+
+// Check if the result of the node is only used as a return value, as
+// otherwise we can't perform a tail-call.
+bool HexagonTargetLowering::isUsedByReturnOnly(SDNode *N,
+ SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->user_begin();
+
+ if (Copy->getOpcode() == ISD::BITCAST) {
+ return isUsedByReturnOnly(Copy, Chain);
+ }
+
+ if (Copy->getOpcode() != ISD::CopyToReg) {
+ return false;
+ }
+
+ // If the ISD::CopyToReg has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
+ return false;
+
+ // The copy must be used by a HexagonISD::RET_GLUE, and nothing else.
+ bool HasRet = false;
+ for (SDNode *Node : Copy->users()) {
+ if (Node->getOpcode() != HexagonISD::RET_GLUE)
+ return false;
+ HasRet = true;
+ }
+ if (!HasRet)
+ return false;
+
+ Chain = Copy->getOperand(0);
+ return true;
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 8d04edb..2d7e3c3 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -145,7 +145,7 @@ public:
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG& DAG) const;
- bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I,
MachineFunction &MF,
unsigned Intrinsic) const override;
@@ -160,6 +160,10 @@ public:
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
+ bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+
+ bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
+
/// Return true if an FMA operation is faster than a pair of mul and add
/// instructions. fmuladd intrinsics will be expanded to FMAs when this
/// method returns true (and FMAs are legal), otherwise fmuladd is
@@ -588,6 +592,7 @@ private:
SDValue WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
SDValue WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue WidenHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const;
SDValue LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) const;
SDValue EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index 0573f64..4bc8e74 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -31,6 +31,10 @@ static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
cl::Hidden, cl::init(16),
cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
+static cl::opt<bool>
+ EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
+ cl::desc("Enable FP fast conversion routine."));
+
static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
@@ -88,6 +92,10 @@ HexagonTargetLowering::initializeHVXLowering() {
addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
}
+ if (Subtarget.useHVXV81Ops()) {
+ addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
+ addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
+ }
}
// Set up operation actions.
@@ -162,6 +170,30 @@ HexagonTargetLowering::initializeHVXLowering() {
setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
+ if (Subtarget.useHVXV81Ops()) {
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
+ setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
+ setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
+ setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
+ setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
+ setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
+ setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
+ setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
+
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::v64bf16, Legal);
+ setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64bf16, Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64bf16, Custom);
+
+ setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
+ setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v64bf16, Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, MVT::v64bf16, Custom);
+
+ setOperationAction(ISD::SPLAT_VECTOR, MVT::bf16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
+ }
+
for (MVT P : FloatW) {
setOperationAction(ISD::LOAD, P, Custom);
setOperationAction(ISD::STORE, P, Custom);
@@ -438,6 +470,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::ANY_EXTEND, VecTy, Custom);
setOperationAction(ISD::SIGN_EXTEND, VecTy, Custom);
setOperationAction(ISD::ZERO_EXTEND, VecTy, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, VecTy, Custom);
if (Subtarget.useHVXFloatingPoint()) {
setOperationAction(ISD::FP_TO_SINT, VecTy, Custom);
setOperationAction(ISD::FP_TO_UINT, VecTy, Custom);
@@ -462,6 +495,10 @@ HexagonTargetLowering::initializeHVXLowering() {
unsigned
HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
+ // Early exit for invalid input types
+ if (!VecTy.isVector())
+ return ~0u;
+
MVT ElemTy = VecTy.getVectorElementType();
unsigned VecLen = VecTy.getVectorNumElements();
unsigned HwLen = Subtarget.getVectorLength();
@@ -1667,14 +1704,15 @@ HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
// In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
// not a legal type, just bitcast the node to use i16
// types and bitcast the result back to f16
- if (VecTy.getVectorElementType() == MVT::f16) {
- SmallVector<SDValue,64> NewOps;
+ if (VecTy.getVectorElementType() == MVT::f16 ||
+ VecTy.getVectorElementType() == MVT::bf16) {
+ SmallVector<SDValue, 64> NewOps;
for (unsigned i = 0; i != Size; i++)
NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
- SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
- tyVector(VecTy, MVT::i16), NewOps);
- return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+ SDValue T0 =
+ DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
+ return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
}
// First, split the BUILD_VECTOR for vector pairs. We could generate
@@ -1698,7 +1736,7 @@ HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
MVT VecTy = ty(Op);
MVT ArgTy = ty(Op.getOperand(0));
- if (ArgTy == MVT::f16) {
+ if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
@@ -1831,12 +1869,12 @@ HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
if (ElemTy == MVT::i1)
return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
- if (ElemTy == MVT::f16) {
+ if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
SDValue T0 = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
tyVector(VecTy, MVT::i16),
DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
DAG.getBitcast(MVT::i16, ValV), IdxV);
- return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
+ return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
}
return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
@@ -2334,6 +2372,25 @@ SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
MVT VecTy = ty(Op);
MVT ArgTy = ty(Op.getOperand(0));
const SDLoc &dl(Op);
+
+ if (ArgTy == MVT::v64bf16) {
+ MVT HalfTy = typeSplit(VecTy).first;
+ SDValue BF16Vec = Op.getOperand(0);
+ SDValue Zeroes =
+ getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
+ // Interleave zero vector with the bf16 vector, with zeroes in the lower
+ // half of each 32 bit lane, effectively extending the bf16 values to fp32
+ // values.
+ SDValue ShuffVec =
+ getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
+ VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
+ SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
+ {VecPair.second, VecPair.first,
+ DAG.getSignedConstant(-4, dl, MVT::i32)},
+ DAG);
+ return Result;
+ }
+
assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
SDValue F16Vec = Op.getOperand(0);
@@ -2918,6 +2975,32 @@ HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
MVT ResTy = ty(Op);
assert(InpTy.changeTypeToInteger() == ResTy);
+ // At this point this is an experiment under a flag.
+ // In arch before V81 the rounding mode is towards nearest value.
+ // The C/C++ standard requires rounding towards zero:
+ // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
+ // finite value of real floating type is converted to an integer type, the
+ // fractional part is discarded (i.e., the value is truncated toward zero)."
+ // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
+ // floating-point type can be converted to a prvalue of an integer type. The
+ // conversion truncates; that is, the fractional part is discarded."
+ if (InpTy == MVT::v64f16) {
+ if (Subtarget.useHVXV81Ops()) {
+ // This is c/c++ compliant
+ SDValue ConvVec =
+ getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
+ return ConvVec;
+ } else if (EnableFpFastConvert) {
+ // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
+ SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
+ return ConvVec;
+ }
+ } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
+ // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
+ SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
+ return ConvVec;
+ }
+
// int32_t conv_f32_to_i32(uint32_t inp) {
// // s | exp8 | frac23
//
@@ -3351,6 +3434,104 @@ HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
{SetCC, getZero(dl, MVT::i32, DAG)});
}
+SDValue HexagonTargetLowering::WidenHvxIntrinsic(SDValue Op,
+ SelectionDAG &DAG) const {
+ const SDLoc &dl(Op);
+ unsigned HwWidth = 8 * Subtarget.getVectorLength();
+ bool IsResInterleaved = false;
+
+ SDValue WideRes = SDValue();
+ SDValue Op1 = Op.getOperand(1);
+ MVT ResTy = ty(Op);
+ MVT OpTy = ty(Op1);
+ if (!Subtarget.isHVXElementType(OpTy) || !Subtarget.isHVXElementType(ResTy))
+ return SDValue();
+
+ auto getFactor = [HwWidth](MVT Ty) {
+ unsigned Width = Ty.getSizeInBits();
+ assert(HwWidth % Width == 0);
+ return HwWidth / Width;
+ };
+
+ auto getWideTy = [getFactor](MVT Ty) {
+ unsigned WideLen = Ty.getVectorNumElements() * getFactor(Ty);
+ return MVT::getVectorVT(Ty.getVectorElementType(), WideLen);
+ };
+
+ unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue Op2 = Op.getOperand(2);
+ SDValue WideOp1 = appendUndef(Op1, getWideTy(OpTy), DAG);
+ SDValue WideOp2;
+ if (dyn_cast<const ConstantSDNode>(Op2.getNode())) {
+ WideOp2 = Op2;
+ } else {
+ WideOp2 = appendUndef(Op2, getWideTy(OpTy), DAG);
+ }
+ unsigned WidenFactor = getFactor(OpTy);
+ unsigned WideLen = ResTy.getVectorNumElements() * WidenFactor;
+ MVT WideResTy = MVT::getVectorVT(ResTy.getVectorElementType(), WideLen);
+
+ switch (IID) {
+ default:
+ return SDValue();
+ case Intrinsic::hexagon_vasrsat_su:
+ case Intrinsic::hexagon_vasrsat_uu:
+ case Intrinsic::hexagon_vasrsat_ss:
+ WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
+ DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2,
+ Op.getOperand(3));
+ break;
+ case Intrinsic::hexagon_vadd_su:
+ case Intrinsic::hexagon_vadd_uu:
+ case Intrinsic::hexagon_vadd_ss:
+ case Intrinsic::hexagon_vadd_us:
+
+ case Intrinsic::hexagon_vsub_su:
+ case Intrinsic::hexagon_vsub_uu:
+ case Intrinsic::hexagon_vsub_ss:
+ case Intrinsic::hexagon_vsub_us:
+
+ case Intrinsic::hexagon_vmpy_su:
+ case Intrinsic::hexagon_vmpy_uu:
+ case Intrinsic::hexagon_vmpy_ss:
+ case Intrinsic::hexagon_vmpy_us:
+ case Intrinsic::hexagon_vmpy_ub_ub:
+ case Intrinsic::hexagon_vmpy_ub_b:
+ case Intrinsic::hexagon_vmpy_uh_uh:
+ case Intrinsic::hexagon_vmpy_h_h:
+ IsResInterleaved = true;
+ WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
+ DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2);
+ break;
+ case Intrinsic::hexagon_vavgu:
+ case Intrinsic::hexagon_vavgs:
+ WideRes = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, WideResTy,
+ DAG.getConstant(IID, dl, MVT::i32), WideOp1, WideOp2);
+ break;
+ }
+ unsigned OrigLen = ResTy.getVectorNumElements();
+ assert(OrigLen % 2 == 0);
+ unsigned HalfOrigLen = OrigLen / 2;
+ unsigned SplitLen = WideLen / 2;
+ if (IsResInterleaved) {
+ // Get the valid odd and even elements from the widened vector-pair while
+ // maintaining their deinterleaved order. The following shuffle_vector will
+ // produce a vector-pair with all the valid elements (even followed by odd)
+ // accumulated together followed by undefs.
+ SmallVector<int, 128> ShuffV;
+ for (unsigned j = 0; j < WidenFactor; j++) {
+ for (unsigned i = 0; i < HalfOrigLen; i++)
+ ShuffV.push_back(j * HalfOrigLen + i);
+ for (unsigned i = 0; i < HalfOrigLen; i++)
+ ShuffV.push_back(SplitLen + j * HalfOrigLen + i);
+ }
+ WideRes = DAG.getVectorShuffle(WideResTy, dl, WideRes,
+ DAG.getUNDEF(WideResTy), ShuffV);
+ }
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResTy,
+ {WideRes, getZero(dl, MVT::i32, DAG)});
+}
+
SDValue
HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
@@ -3617,6 +3798,12 @@ HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
Results.push_back(S);
}
break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ if (shouldWidenToHvx(ty(Op.getOperand(1)), DAG)) {
+ if (SDValue T = WidenHvxIntrinsic(Op, DAG))
+ Results.push_back(T);
+ }
+ break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::FP_TO_SINT:
@@ -3677,6 +3864,11 @@ HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
Results.push_back(C);
}
break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ assert(shouldWidenToHvx(ty(N->getOperand(1)), DAG) && "Not widening?");
+ if (SDValue T = WidenHvxIntrinsic(Op, DAG))
+ Results.push_back(T);
+ break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 47726d6..7682af4 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -118,9 +118,9 @@ const int Hexagon_ADDI_OFFSET_MIN = -32768;
void HexagonInstrInfo::anchor() {}
HexagonInstrInfo::HexagonInstrInfo(const HexagonSubtarget &ST)
- : HexagonGenInstrInfo(ST, Hexagon::ADJCALLSTACKDOWN,
+ : HexagonGenInstrInfo(ST, RegInfo, Hexagon::ADJCALLSTACKDOWN,
Hexagon::ADJCALLSTACKUP),
- Subtarget(ST) {}
+ RegInfo(ST.getHwMode()), Subtarget(ST) {}
namespace llvm {
namespace HexagonFUnits {
@@ -964,7 +964,6 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
Register SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
Register VReg,
MachineInstr::MIFlag Flags) const {
DebugLoc DL = MBB.findDebugLoc(I);
@@ -1009,10 +1008,12 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
}
}
-void HexagonInstrInfo::loadRegFromStackSlot(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register DestReg,
- int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
- Register VReg, MachineInstr::MIFlag Flags) const {
+void HexagonInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ Register DestReg, int FI,
+ const TargetRegisterClass *RC,
+ Register VReg,
+ MachineInstr::MIFlag Flags) const {
DebugLoc DL = MBB.findDebugLoc(I);
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -4753,6 +4754,19 @@ bool HexagonInstrInfo::getBundleNoShuf(const MachineInstr &MIB) const {
return (Operand.isImm() && (Operand.getImm() & memShufDisabledMask) != 0);
}
+bool HexagonInstrInfo::isQFPMul(const MachineInstr *MI) const {
+ return (MI->getOpcode() == Hexagon::V6_vmpy_qf16_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_sf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_mix_hf ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32_qf16 ||
+ MI->getOpcode() == Hexagon::V6_vmpy_qf32);
+}
+
// Addressing mode relations.
short HexagonInstrInfo::changeAddrMode_abs_io(short Opc) const {
return Opc >= 0 ? Hexagon::changeAddrMode_abs_io(Opc) : Opc;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index c17e527..796b978 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -23,6 +23,8 @@
#include <cstdint>
#include <vector>
+#include "HexagonRegisterInfo.h"
+
#define GET_INSTRINFO_HEADER
#include "HexagonGenInstrInfo.inc"
@@ -36,6 +38,7 @@ class MachineOperand;
class TargetRegisterInfo;
class HexagonInstrInfo : public HexagonGenInstrInfo {
+ const HexagonRegisterInfo RegInfo;
const HexagonSubtarget &Subtarget;
enum BundleAttribute {
@@ -47,6 +50,8 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
public:
explicit HexagonInstrInfo(const HexagonSubtarget &ST);
+ const HexagonRegisterInfo &getRegisterInfo() const { return RegInfo; }
+
/// TargetInstrInfo overrides.
/// If the specified machine instruction is a direct
@@ -183,8 +188,7 @@ public:
/// is true, the register operand is the last use and must be marked kill.
void storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
- bool isKill, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
/// Load the specified register of the given register class from the specified
@@ -193,7 +197,7 @@ public:
void loadRegFromStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
Register DestReg, int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI, Register VReg,
+ Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
/// This function is called for all pseudo instructions
@@ -532,6 +536,7 @@ public:
}
MCInst getNop() const override;
+ bool isQFPMul(const MachineInstr *MF) const;
};
/// \brief Create RegSubRegPair from a register MachineOperand
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 25b81d8..7f16c3e 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -355,6 +355,120 @@ defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignbi>;
defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>;
defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>;
+
+class VAccGenIntrin_pat<InstHexagon MI, Intrinsic IntID,
+ ValueType ResType, PatFrag VPred, PatFrag WPred>
+ : Pat<(add WPred:$Vx, (ResType (IntID VPred:$Vs, VPred:$Vt))),
+ (MI WPred:$Vx, VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>;
+
+let AddedComplexity = 100 in {
+ def : VAccGenIntrin_pat<V6_vmpybv_acc,
+ int_hexagon_vmpy_ss, VecPI16, HVI8, HWI16>;
+ def : VAccGenIntrin_pat<V6_vmpyubv_acc,
+ int_hexagon_vmpy_uu, VecPI16, HVI8, HWI16>;
+ def : VAccGenIntrin_pat<V6_vmpyhv_acc,
+ int_hexagon_vmpy_ss, VecPI32, HVI16, HWI32>;
+ def : VAccGenIntrin_pat<V6_vmpyuhv_acc,
+ int_hexagon_vmpy_uu, VecPI32, HVI16, HWI32>;
+
+ // The second operand in V6_vmpybusv_acc is unsigned.
+ def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_us HVI8:$Vs,
+ HVI8:$Vv))),
+ (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vs, HVI8:$Vv)>;
+
+ def : Pat<(add HWI16:$Vx, (VecPI16 (int_hexagon_vmpy_su HVI8:$Vs,
+ HVI8:$Vv))),
+ (V6_vmpybusv_acc HWI16:$Vx, HVI8:$Vv, HVI8:$Vs)>;
+
+ // The third operand in V6_vmpyhus_acc is unsigned.
+ def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_us HVI16:$Vs,
+ HVI16:$Vv))),
+ (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vv, HVI16:$Vs)>;
+
+ def : Pat<(add HWI32:$Vx, (VecPI32 (int_hexagon_vmpy_su HVI16:$Vs,
+ HVI16:$Vv))),
+ (V6_vmpyhus_acc HWI32:$Vx, HVI16:$Vs, HVI16:$Vv)>;
+}
+
+class ExtIntrin_pat<InstHexagon MI, Intrinsic IntID,
+ ValueType ResType, PatFrag VPred>
+ : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)),
+ (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>;
+
+def : ExtIntrin_pat<V6_vaddubh, int_hexagon_vadd_uu, VecPI16, HVI8>;
+def : ExtIntrin_pat<V6_vadduhw, int_hexagon_vadd_uu, VecPI32, HVI16>;
+def : ExtIntrin_pat<V6_vaddhw, int_hexagon_vadd_ss, VecPI32, HVI16>;
+
+def : ExtIntrin_pat<V6_vsububh, int_hexagon_vsub_uu, VecPI16, HVI8>;
+def : ExtIntrin_pat<V6_vsubuhw, int_hexagon_vsub_uu, VecPI32, HVI16>;
+def : ExtIntrin_pat<V6_vsubhw, int_hexagon_vsub_ss, VecPI32, HVI16>;
+
+def : ExtIntrin_pat<V6_vmpybv, int_hexagon_vmpy_ss, VecPI16, HVI8>;
+def : ExtIntrin_pat<V6_vmpyhv, int_hexagon_vmpy_ss, VecPI32, HVI16>;
+def : ExtIntrin_pat<V6_vmpyubv, int_hexagon_vmpy_uu, VecPI16, HVI8>;
+def : ExtIntrin_pat<V6_vmpyuhv, int_hexagon_vmpy_uu, VecPI32, HVI16>;
+
+// The first operand in V6_vmpybusv is unsigned.
+def : Pat<(VecPI16 (int_hexagon_vmpy_us HVI8:$Vs, HVI8:$Vv)),
+ (V6_vmpybusv HVI8:$Vs, HVI8:$Vv)>;
+
+def : Pat<(VecPI16 (int_hexagon_vmpy_su HVI8:$Vs, HVI8:$Vv)),
+ (V6_vmpybusv HVI8:$Vv, HVI8:$Vs)>;
+
+// The second operand in V6_vmpyhus is unsigned.
+def : Pat<(VecPI32 (int_hexagon_vmpy_us HVI16:$Vs, HVI16:$Vv)),
+ (V6_vmpyhus HVI16:$Vv, HVI16:$Vs)>;
+
+def : Pat<(VecPI32 (int_hexagon_vmpy_su HVI16:$Vs, HVI16:$Vv)),
+ (V6_vmpyhus HVI16:$Vs, HVI16:$Vv)>;
+
+class VAvgInstr_pat<InstHexagon MI, Intrinsic IntID,
+ ValueType ResType, PatFrag VPred>
+ : Pat<(ResType (IntID VPred:$Vs, VPred:$Vt)),
+ (MI VPred:$Vs, VPred:$Vt)>, Requires<[UseHVX128B]>;
+
+def : VAvgInstr_pat<V6_vavgub, int_hexagon_vavgu, VecI8, HVI8>;
+def : VAvgInstr_pat<V6_vavgb, int_hexagon_vavgs, VecI8, HVI8>;
+def : VAvgInstr_pat<V6_vavguh, int_hexagon_vavgu, VecI16, HVI16>;
+def : VAvgInstr_pat<V6_vavgh, int_hexagon_vavgs, VecI16, HVI16>;
+def : VAvgInstr_pat<V6_vavguw, int_hexagon_vavgu, VecI32, HVI32>;
+def : VAvgInstr_pat<V6_vavgw, int_hexagon_vavgs, VecI32, HVI32>;
+
+class VAsrIntr_pat<InstHexagon MI, Intrinsic IntID,
+ ValueType ResType, PatFrag VPred>
+: Pat<(ResType (IntID VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)),
+ (MI VPred:$Vs, VPred:$Vt, IntRegsLow8:$Rt)>, Requires<[UseHVX128B]>;
+
+def : VAsrIntr_pat<V6_vasruhubsat, int_hexagon_vasrsat_uu, VecI8, HVI16>;
+def : VAsrIntr_pat<V6_vasrhubsat, int_hexagon_vasrsat_su, VecI8, HVI16>;
+def : VAsrIntr_pat<V6_vasrhbsat, int_hexagon_vasrsat_ss, VecI8, HVI16>;
+def : VAsrIntr_pat<V6_vasruwuhsat, int_hexagon_vasrsat_uu, VecI16, HVI32>;
+def : VAsrIntr_pat<V6_vasrwuhsat, int_hexagon_vasrsat_su, VecI16, HVI32>;
+def : VAsrIntr_pat<V6_vasrwhsat, int_hexagon_vasrsat_ss, VecI16, HVI32>;
+
+class VMpyVSInstr_pat<InstHexagon MI, Intrinsic IntID,
+ ValueType ResType, PatFrag VPred>
+: Pat<(ResType (IntID VPred:$Vs, IntRegs:$Rt)),
+ (MI VPred:$Vs, IntRegs:$Rt)>, Requires<[UseHVX128B]>;
+
+def : VMpyVSInstr_pat<V6_vmpyub, int_hexagon_vmpy_ub_ub, VecPI16, HVI8>;
+def : VMpyVSInstr_pat<V6_vmpybus, int_hexagon_vmpy_ub_b, VecPI16, HVI8>;
+def : VMpyVSInstr_pat<V6_vmpyuh, int_hexagon_vmpy_uh_uh, VecPI32, HVI16>;
+def : VMpyVSInstr_pat<V6_vmpyh, int_hexagon_vmpy_h_h, VecPI32, HVI16>;
+
+class VAccIntrin_pat<InstHexagon MI, Intrinsic IntID>
+ : Pat<(add HvxWR:$Vx, (IntID HvxVR:$Vs, HvxVR:$Vt)),
+ (MI HvxWR:$Vx, HvxVR:$Vs, HvxVR:$Vt)>, Requires<[UseHVX128B]>;
+
+let AddedComplexity = 350 in {
+ def : VAccIntrin_pat<V6_vmpybv_acc, int_hexagon_V6_vmpybv_128B>;
+ def : VAccIntrin_pat<V6_vmpyubv_acc, int_hexagon_V6_vmpyubv_128B>;
+ def : VAccIntrin_pat<V6_vmpybusv_acc, int_hexagon_V6_vmpybusv_128B>;
+ def : VAccIntrin_pat<V6_vmpyhv_acc, int_hexagon_V6_vmpyhv_128B>;
+ def : VAccIntrin_pat<V6_vmpyuhv_acc, int_hexagon_V6_vmpyuhv_128B>;
+ def : VAccIntrin_pat<V6_vmpyhus_acc, int_hexagon_V6_vmpyhus_128B>;
+}
+
def: Pat<(int_hexagon_V6_vd0),
(V6_vd0)>, Requires<[UseHVXV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vd0_128B ),
diff --git a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
index 7cbd81f..54969b2 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoadStoreWidening.cpp
@@ -646,7 +646,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
MachineInstr *CombI;
if (Acc != 0) {
const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
- const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0);
Register VReg = MF->getRegInfo().createVirtualRegister(RC);
MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(LowerAcc);
NG.push_back(TfrI);
@@ -677,7 +677,7 @@ bool HexagonLoadStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
} else {
// Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
- const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI);
+ const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0);
Register VReg = MF->getRegInfo().createVirtualRegister(RC);
MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg).addImm(int(Acc));
NG.push_back(TfrI);
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 5a1d5bc..c68b632 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -138,7 +138,7 @@ static bool canBeFeederToNewValueJump(const HexagonInstrInfo *QII,
return false;
// Make sure that the (unique) def operand is a register from IntRegs.
- bool HadDef = false;
+ [[maybe_unused]] bool HadDef = false;
for (const MachineOperand &Op : II->operands()) {
if (!Op.isReg() || !Op.isDef())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 6dd83c1..53afbc4 100644
--- a/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -107,7 +107,7 @@ private:
bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI,
const NodeList &UNodeList);
bool isSafeToExtLR(NodeAddr<StmtNode *> SN, MachineInstr *MI,
- unsigned LRExtReg, const NodeList &UNodeList);
+ Register LRExtReg, const NodeList &UNodeList);
void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList);
bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList);
short getBaseWithLongOffset(const MachineInstr &MI) const;
@@ -177,7 +177,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
NodeId OffsetRegRD = 0;
for (NodeAddr<UseNode *> UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) {
RegisterRef RR = UA.Addr->getRegRef(*DFG);
- if (OffsetReg == RR.Reg) {
+ if (OffsetReg == RR.asMCReg()) {
OffsetRR = RR;
OffsetRegRD = UA.Addr->getReachingDef();
}
@@ -198,7 +198,7 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
// Reaching Def to an offset register can't be a phi.
if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
MI.getParent() != UseMI.getParent())
- return false;
+ return false;
const MCInstrDesc &UseMID = UseMI.getDesc();
if ((!UseMID.mayLoad() && !UseMID.mayStore()) ||
@@ -300,7 +300,7 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
}
bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
- MachineInstr *MI, unsigned LRExtReg,
+ MachineInstr *MI, Register LRExtReg,
const NodeList &UNodeList) {
RegisterRef LRExtRR;
NodeId LRExtRegRD = 0;
@@ -308,7 +308,7 @@ bool HexagonOptAddrMode::isSafeToExtLR(NodeAddr<StmtNode *> SN,
// for the LRExtReg.
for (NodeAddr<UseNode *> UA : SN.Addr->members_if(DFG->IsUse, *DFG)) {
RegisterRef RR = UA.Addr->getRegRef(*DFG);
- if (LRExtReg == RR.Reg) {
+ if (LRExtReg == RR.asMCReg()) {
LRExtRR = RR;
LRExtRegRD = UA.Addr->getReachingDef();
}
@@ -552,7 +552,7 @@ bool HexagonOptAddrMode::processAddBases(NodeAddr<StmtNode *> AddSN,
// Find the UseNode that contains the base register and it's reachingDef
for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) {
RegisterRef URR = UA.Addr->getRegRef(*DFG);
- if (BaseReg != URR.Reg)
+ if (BaseReg != URR.asMCReg())
continue;
UAReachingDefID = UA.Addr->getReachingDef();
@@ -740,7 +740,7 @@ bool HexagonOptAddrMode::processAddUses(NodeAddr<StmtNode *> AddSN,
// for the LRExtReg.
for (NodeAddr<UseNode *> UA : AddSN.Addr->members_if(DFG->IsUse, *DFG)) {
RegisterRef RR = UA.Addr->getRegRef(*DFG);
- if (BaseReg == RR.Reg)
+ if (BaseReg == RR.asMCReg())
LRExtRegRD = UA.Addr->getReachingDef();
}
diff --git a/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp
new file mode 100644
index 0000000..fcfae17
--- /dev/null
+++ b/llvm/lib/Target/Hexagon/HexagonOptShuffleVector.cpp
@@ -0,0 +1,713 @@
+//===---------------------- HexagonOptShuffleVector.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Optimize vector shuffles by postponing them as late as possible. The intent
+// here is to remove uncessary shuffles and also increases the oportunities for
+// adjacent shuffles to be merged together.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonTargetMachine.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicsHexagon.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+using namespace PatternMatch;
+
+#define DEBUG_TYPE "hex-shuff-vec"
+/// A command line argument to limit the search space along def chain.
+static cl::opt<int> MaxDefSearchCount(
+ "shuffvec-max-search-count",
+ cl::desc("Maximum number of instructions traversed along def chain."),
+ cl::Hidden, cl::init(15));
+
+#ifndef NDEBUG
+static cl::opt<int>
+ ShuffVecLimit("shuff-vec-max",
+ cl::desc("Maximum number of shuffles to be relocated."),
+ cl::Hidden, cl::init(-1));
+#endif
+
+namespace llvm {
+void initializeHexagonOptShuffleVectorPass(PassRegistry &);
+FunctionPass *createHexagonOptShuffleVector(const HexagonTargetMachine &);
+} // end namespace llvm
+
+namespace {
+
+class HexagonOptShuffleVector : public FunctionPass {
+public:
+ static char ID;
+#ifndef NDEBUG
+ static int NumRelocated;
+#endif
+ HexagonOptShuffleVector() : FunctionPass(ID) {
+ initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ HexagonOptShuffleVector(const HexagonTargetMachine *TM)
+ : FunctionPass(ID), TM(TM) {
+ initializeHexagonOptShuffleVectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ StringRef getPassName() const override {
+ return "Hexagon Optimize Vector Shuffles";
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ FunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ using ValueVector = SmallVector<Value *, 8>;
+ const HexagonTargetMachine *TM = nullptr;
+ const HexagonSubtarget *HST = nullptr;
+ SmallPtrSet<Instruction *, 8> Visited;
+ using ShuffUseList =
+ SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>;
+ ShuffUseList ShuffUses;
+ int DefSearchCount;
+
+ bool visitBlock(BasicBlock *B);
+ bool findNewShuffLoc(Instruction *I, ArrayRef<int> &ShuffMask,
+ Value *&NewLoc);
+ bool isValidIntrinsic(IntrinsicInst *I);
+ bool relocateShuffVec(Instruction *I, ArrayRef<int> &M, Value *NewLoc,
+ std::list<Instruction *> &WorkList);
+ bool getUseList(Instruction *I, ValueVector &UseList);
+ bool analyzeHiLoUse(Instruction *HI, Instruction *LO,
+ ArrayRef<int> &ShuffMask, Value *&NewLoc,
+ ShuffUseList &CurShuffUses);
+ bool isHILo(Value *V, bool IsHI);
+ bool hasDefWithSameShuffMask(Value *V, SmallVector<Instruction *, 2> &ImmUse,
+ ArrayRef<int> &ShuffMask,
+ ShuffUseList &CurShuffUses);
+ void FindHiLoUse(ValueVector &UseList, Instruction *&HI, Instruction *&LO);
+ bool isConcatMask(ArrayRef<int> &Mask, Instruction *ShuffInst);
+ bool isValidUseInstr(ValueVector &UseList, Instruction *&UI);
+ bool areAllOperandsValid(Instruction *I, Instruction *UI,
+ ArrayRef<int> &ShuffMask,
+ ShuffUseList &CurShuffUses);
+ Value *getOperand(Instruction *I, unsigned i);
+ static iterator_range<User::op_iterator> getArgOperands(User *U);
+ static std::pair<Value *, Value *> stripCasts(Value *V);
+ static bool isConstantVectorSplat(Value *V);
+};
+
+} // end anonymous namespace
+
+#ifndef NDEBUG
+int HexagonOptShuffleVector::NumRelocated = 0;
+#endif
+char HexagonOptShuffleVector::ID = 0;
+
+INITIALIZE_PASS_BEGIN(HexagonOptShuffleVector, "shuff-vec",
+ "Hexagon Optimize Shuffle Vector", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(HexagonOptShuffleVector, "shuff-vec",
+ "Hexagon Optimize Shuffle Vector", false, false)
+
+bool HexagonOptShuffleVector::isConcatMask(ArrayRef<int> &Mask,
+ Instruction *ShuffInst) {
+ Type *ShuffTy = ShuffInst->getType();
+ int NumElts = cast<FixedVectorType>(ShuffTy)->getNumElements();
+ for (int i = 0; i < NumElts; i++) {
+ if (Mask[i] != i)
+ return false;
+ }
+ return true;
+}
+
+bool HexagonOptShuffleVector::isValidIntrinsic(IntrinsicInst *I) {
+ switch (I->getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::hexagon_V6_vaddubh_128B:
+ case Intrinsic::hexagon_V6_vadduhw_128B:
+ case Intrinsic::hexagon_V6_vaddhw_128B:
+ case Intrinsic::hexagon_V6_vaddh_dv_128B:
+ case Intrinsic::hexagon_V6_vsububh_128B:
+ case Intrinsic::hexagon_V6_vsubuhw_128B:
+ case Intrinsic::hexagon_V6_vsubhw_128B:
+ case Intrinsic::hexagon_V6_vsubh_dv_128B:
+ case Intrinsic::hexagon_V6_vmpyubv_128B:
+ case Intrinsic::hexagon_V6_vmpybv_128B:
+ case Intrinsic::hexagon_V6_vmpyuhv_128B:
+ case Intrinsic::hexagon_V6_vmpyhv_128B:
+ case Intrinsic::hexagon_V6_vmpybusv_128B:
+ case Intrinsic::hexagon_V6_vmpyhus_128B:
+ case Intrinsic::hexagon_V6_vavgb_128B:
+ case Intrinsic::hexagon_V6_vavgub_128B:
+ case Intrinsic::hexagon_V6_vavgh_128B:
+ case Intrinsic::hexagon_V6_vavguh_128B:
+ case Intrinsic::hexagon_V6_vavgw_128B:
+ case Intrinsic::hexagon_V6_vavguw_128B:
+ case Intrinsic::hexagon_V6_hi_128B:
+ case Intrinsic::hexagon_V6_lo_128B:
+ case Intrinsic::sadd_sat:
+ case Intrinsic::uadd_sat:
+ // Generic hexagon vector intrinsics
+ case Intrinsic::hexagon_vadd_su:
+ case Intrinsic::hexagon_vadd_uu:
+ case Intrinsic::hexagon_vadd_ss:
+ case Intrinsic::hexagon_vadd_us:
+ case Intrinsic::hexagon_vsub_su:
+ case Intrinsic::hexagon_vsub_uu:
+ case Intrinsic::hexagon_vsub_ss:
+ case Intrinsic::hexagon_vsub_us:
+ case Intrinsic::hexagon_vmpy_su:
+ case Intrinsic::hexagon_vmpy_uu:
+ case Intrinsic::hexagon_vmpy_ss:
+ case Intrinsic::hexagon_vmpy_us:
+ case Intrinsic::hexagon_vavgu:
+ case Intrinsic::hexagon_vavgs:
+ case Intrinsic::hexagon_vmpy_ub_b:
+ case Intrinsic::hexagon_vmpy_ub_ub:
+ case Intrinsic::hexagon_vmpy_uh_uh:
+ case Intrinsic::hexagon_vmpy_h_h:
+ return true;
+ }
+ llvm_unreachable("Unsupported instruction!");
+}
+
+bool HexagonOptShuffleVector::getUseList(Instruction *I, ValueVector &UseList) {
+ for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
+ Instruction *J = dyn_cast<Instruction>(*UI);
+ if (!J)
+ return false;
+ if (auto *C = dyn_cast<CastInst>(*UI)) {
+ if (!getUseList(C, UseList))
+ return false;
+ } else
+ UseList.push_back(*UI);
+ ++UI;
+ }
+ return true;
+}
+
+bool HexagonOptShuffleVector::isHILo(Value *V, bool IsHI) {
+ if (!(dyn_cast<Instruction>(V)))
+ return false;
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!isa<CallInst>(I))
+ return false;
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+ if (!II)
+ return false;
+ if ((II->getIntrinsicID() == Intrinsic::hexagon_V6_hi_128B && IsHI) ||
+ (II->getIntrinsicID() == Intrinsic::hexagon_V6_lo_128B && !IsHI))
+ return true;
+ return false;
+}
+
+Value *HexagonOptShuffleVector::getOperand(Instruction *I, unsigned i) {
+ Value *V = I->getOperand(i);
+ if (auto *C = dyn_cast<CastInst>(V))
+ return C->getOperand(0);
+ return V;
+}
+
+iterator_range<User::op_iterator>
+HexagonOptShuffleVector::getArgOperands(User *U) {
+ if (auto *CB = dyn_cast<CallBase>(U))
+ return CB->args();
+ return U->operands();
+}
+
+// Strip out all the cast operations to find the first non-cast definition of a
+// value. The function also returns the last cast operation in the def-chain.
+std::pair<Value *, Value *> HexagonOptShuffleVector::stripCasts(Value *V) {
+ Value *LastCast = nullptr;
+ while (auto *C = dyn_cast<CastInst>(V)) {
+ LastCast = V;
+ V = C->getOperand(0);
+ }
+ return std::make_pair(V, LastCast);
+}
+
+bool HexagonOptShuffleVector::isConstantVectorSplat(Value *V) {
+ if (auto *CV = dyn_cast<ConstantVector>(V))
+ return CV->getSplatValue();
+ if (auto *CV = dyn_cast<ConstantDataVector>(V))
+ return CV->isSplat();
+ return false;
+}
+
+// Make sure all the operations on HI and LO counterparts are identical
+// until both halves are merged together. When a merge point (concat)
+// is found, set it as 'NewLoc' and return.
+bool HexagonOptShuffleVector::analyzeHiLoUse(Instruction *HI, Instruction *LO,
+ ArrayRef<int> &ShuffMask,
+ Value *&NewLoc,
+ ShuffUseList &CurShuffUses) {
+ ValueVector HiUseList, LoUseList;
+ getUseList(HI, HiUseList);
+ getUseList(LO, LoUseList);
+
+ // To keep the analsis simple, only handle Hi and Lo with a single use. Also,
+ // not even sure at this point if it will be profitable due to multiple
+ // merge points.
+ if (HiUseList.size() != 1 || LoUseList.size() != 1)
+ return false;
+
+ Instruction *HiUse = dyn_cast<Instruction>(HiUseList[0]);
+ Instruction *LoUse = dyn_cast<Instruction>(LoUseList[0]);
+ if (!HiUse || !LoUse)
+ return false;
+
+ bool IsUseIntrinsic = false;
+ if (isa<CallInst>(HiUse)) {
+ if (!isa<CallInst>(LoUse))
+ return false;
+ // Continue only if both Hi and Lo uses are calls to the same intrinsic.
+ IntrinsicInst *HiUseII = dyn_cast<IntrinsicInst>(HiUse);
+ IntrinsicInst *LoUseII = dyn_cast<IntrinsicInst>(LoUse);
+ if (!HiUseII || !LoUseII ||
+ HiUseII->getIntrinsicID() != LoUseII->getIntrinsicID() ||
+ !isValidIntrinsic(HiUseII))
+ return false;
+ IsUseIntrinsic = true;
+ HiUse = HiUseII;
+ LoUse = LoUseII;
+ }
+ if (HiUse->getOpcode() != LoUse->getOpcode())
+ return false;
+
+ // If both Hi and Lo use are same and is a concat operation, set it
+ // as a 'NewLoc'.
+ if (HiUse == LoUse) {
+ // Return true if use is a concat of Hi and Lo.
+ ArrayRef<int> M;
+ if (match(HiUse, (m_Shuffle(m_Value(), m_Value(), m_Mask(M))))) {
+ if (isConcatMask(M, HiUse)) {
+ NewLoc = HiUse;
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Check if HiUse and LoUse are shuffles with the same mask. If so, safe to
+ // continue the search.
+ ArrayRef<int> M1, M2;
+ if (match(HiUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M1)))) &&
+ match(LoUse, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M2)))) &&
+ M1.equals(M2))
+ return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses);
+
+ // For now, only handling binary ops and some of the instrinsics
+ // which appear to be safe (hardcoded in isValidIntrinsic()).
+ if (!HiUse->isBinaryOp() && !IsUseIntrinsic)
+ return false;
+
+ ValueVector HiUseOperands, LoUseOperands;
+ int HiOpNum = -1, LoOpNum = -1;
+ for (unsigned i = 0; i < HiUse->getNumOperands(); i++) {
+ Value *V = getOperand(HiUse, i);
+ if (V == HI)
+ HiOpNum = i;
+ else
+ HiUseOperands.push_back(V);
+ }
+ for (unsigned i = 0; i < LoUse->getNumOperands(); i++) {
+ Value *V = getOperand(LoUse, i);
+ if (V == LO)
+ LoOpNum = i;
+ else
+ LoUseOperands.push_back(V);
+ }
+
+ // Enforcing strict ordering which is not necessary in case of
+ // commutative operations and may be relaxed in future if needed.
+ if (HiOpNum < 0 || HiOpNum != LoOpNum ||
+ LoUseOperands.size() != HiUseOperands.size())
+ return false;
+
+ unsigned NumOperands = HiUseOperands.size();
+ for (unsigned i = 0; i < NumOperands; i++) {
+ if (HiUseOperands[i] == LoUseOperands[i])
+ continue;
+ // Only handle the case where other operands to Hi and Lo uses
+ // are comming from another Hi and Lo pair.
+ if (!isHILo(HiUseOperands[i], true) || !isHILo(LoUseOperands[i], false))
+ return false;
+
+ Value *DefHiUse = dyn_cast<Instruction>(HiUseOperands[i])->getOperand(0);
+ Value *DefLoUse = dyn_cast<Instruction>(LoUseOperands[i])->getOperand(0);
+ if (!DefHiUse || DefHiUse != DefLoUse)
+ return false;
+ SmallVector<Instruction *, 2> ImmUseList;
+ if (dyn_cast<CastInst>(DefHiUse))
+ ImmUseList.push_back(dyn_cast<Instruction>(DefHiUse));
+ else {
+ ImmUseList.push_back(HiUse);
+ ImmUseList.push_back(LoUse);
+ }
+
+ // Make sure that the Hi/Lo def has the same shuffle mask.
+ if (!hasDefWithSameShuffMask(DefHiUse, ImmUseList, ShuffMask, CurShuffUses))
+ return false;
+ }
+
+ // Continue the search along Hi/Lo use-chain.
+ return analyzeHiLoUse(HiUse, LoUse, ShuffMask, NewLoc, CurShuffUses);
+}
+
+bool HexagonOptShuffleVector::hasDefWithSameShuffMask(
+ Value *V, SmallVector<Instruction *, 2> &ImmUses, ArrayRef<int> &ShuffMask,
+ ShuffUseList &CurShuffUses) {
+ // Follow def-chain until we have found a shuffle_vector or have run out
+ // of max number of attempts.
+ if (DefSearchCount >= MaxDefSearchCount)
+ return false;
+
+ ++DefSearchCount;
+ V = stripCasts(V).first;
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+ bool Found = true;
+ ArrayRef<int> M;
+ if (match(V, (m_Shuffle(m_Value(), m_Value(), m_Mask(M)))) &&
+ M.equals(ShuffMask)) {
+ CurShuffUses[I] = ImmUses;
+ return true;
+ }
+ if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()),
+ m_Poison(), m_ZeroMask()))))
+ return true; // scalar converted to a vector
+
+ auto *II = dyn_cast<IntrinsicInst>(I);
+ if (!I->isBinaryOp() && (!II || !isValidIntrinsic(II)))
+ return false;
+
+ for (Value *OpV : getArgOperands(I)) {
+ std::pair<Value *, Value *> P = stripCasts(OpV);
+ OpV = P.first;
+
+ SmallVector<Instruction *, 2> ImmUseList;
+ if (P.second)
+ ImmUseList.push_back(dyn_cast<Instruction>(P.second));
+ else
+ ImmUseList.push_back(dyn_cast<Instruction>(I));
+
+ if (isa<PoisonValue>(OpV))
+ continue;
+ if (isConstantVectorSplat(OpV))
+ continue;
+ if (!dyn_cast<Instruction>(OpV))
+ return false;
+ if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()),
+ m_Poison(), m_ZeroMask()))))
+ continue;
+ Found &= hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses);
+ }
+ return Found;
+}
+
+void HexagonOptShuffleVector::FindHiLoUse(ValueVector &UseList,
+ Instruction *&HI, Instruction *&LO) {
+
+ for (unsigned i = 0; i < UseList.size(); i++) {
+ auto *J = dyn_cast<Instruction>(UseList[i]);
+ auto *CI = dyn_cast<CallInst>(J);
+ if (CI) {
+ auto *II = dyn_cast<IntrinsicInst>(CI);
+ if (II) {
+ Intrinsic::ID IntID = II->getIntrinsicID();
+ if (IntID == Intrinsic::hexagon_V6_hi_128B)
+ HI = J;
+ if (IntID == Intrinsic::hexagon_V6_lo_128B)
+ LO = J;
+ }
+ }
+ }
+}
+
+bool HexagonOptShuffleVector::isValidUseInstr(ValueVector &UseList,
+ Instruction *&UI) {
+ // Don't allow multiple uses. Only done in case of a Hi/Lo pair.
+ if (UseList.size() != 1)
+ return false;
+ UI = dyn_cast<Instruction>(UseList[0]);
+ if (!UI)
+ return false;
+ // Should be either a binary op or one of the supported instrinsics.
+ if (auto *CI = dyn_cast<CallInst>(UI)) {
+ auto *II = dyn_cast<IntrinsicInst>(CI);
+ if (!II || !isValidIntrinsic(II))
+ return false;
+ UI = II;
+ } else if (!UI->isBinaryOp())
+ return false;
+ return true;
+}
+
+// Check all the operands of 'Use' to make sure that they are either:
+// 1) a constant
+// 2) a scalar
+// 3) a constant vector
+// 4) a vector using the same mask as I
+bool HexagonOptShuffleVector::areAllOperandsValid(Instruction *I,
+ Instruction *Use,
+ ArrayRef<int> &ShuffMask,
+ ShuffUseList &CurShuffUses) {
+ bool AllOperandsOK = true;
+ for (Value *OpV : getArgOperands(Use)) {
+ bool HasOneUse = OpV->hasOneUse();
+ std::pair<Value *, Value *> P = stripCasts(OpV);
+ OpV = P.first;
+
+ SmallVector<Instruction *, 2> ImmUseList;
+ if (P.second)
+ ImmUseList.push_back(dyn_cast<Instruction>(P.second));
+ else
+ ImmUseList.push_back(dyn_cast<Instruction>(Use));
+
+ if (OpV == I || isa<PoisonValue>(OpV))
+ continue;
+ if (isConstantVectorSplat(OpV))
+ continue;
+ if (!dyn_cast<Instruction>(OpV) || !HasOneUse)
+ return false;
+
+ if ((match(OpV, m_Shuffle(m_InsertElt(m_Poison(), m_Value(), m_Zero()),
+ m_Poison(), m_ZeroMask()))))
+ continue;
+ AllOperandsOK &=
+ hasDefWithSameShuffMask(OpV, ImmUseList, ShuffMask, CurShuffUses);
+ }
+ return AllOperandsOK;
+}
+
+// Find the new location where it's safe to relocate shuffle instruction 'I'.
+bool HexagonOptShuffleVector::findNewShuffLoc(Instruction *I,
+ ArrayRef<int> &ShuffMask,
+ Value *&NewLoc) {
+ DefSearchCount = 0;
+ ValueVector UseList;
+ if (!getUseList(I, UseList))
+ return false;
+
+ using ShuffUseList =
+ SmallDenseMap<Instruction *, SmallVector<Instruction *, 2>>;
+ ShuffUseList CurShuffUses;
+ // Check for Hi and Lo pair.
+ Instruction *HI = nullptr, *LO = nullptr;
+ FindHiLoUse(UseList, HI, LO);
+ if (UseList.size() == 2 && HI && LO) {
+ // If 'I' has Hi and Lo use-pair, then it can be relocated only after Hi/Lo
+ // use-chain's merge point, i.e., after a concat vector provided it's safe
+ // to do so.
+ LLVM_DEBUG({
+ dbgs() << "\tFollowing the Hi/LO pair :\n";
+ dbgs() << "\t\tHI - ";
+ HI->dump();
+ dbgs() << "\t\tLO - ";
+ LO->dump();
+ });
+ if (!analyzeHiLoUse(HI, LO, ShuffMask, NewLoc, CurShuffUses))
+ return false;
+ for (auto &it : CurShuffUses)
+ ShuffUses[it.first] = it.second;
+ return true;
+ } else { // Single use case
+ Instruction *UI = nullptr;
+ if (!isValidUseInstr(UseList, UI))
+ return false;
+ assert(UI && "Expected a valid use, but found none!!");
+
+ if (HI || LO) {
+ // If the single use case is either Hi or Lo, it is not safe to relocate
+ return false;
+ }
+
+ LLVM_DEBUG(dbgs() << "\tChecking operands in 'use' : \n\t\t"; UI->dump());
+ if (!areAllOperandsValid(I, UI, ShuffMask, CurShuffUses)) {
+ LLVM_DEBUG(dbgs() << "\t\tNOT SAFE -- Exiting!!\n");
+ return false;
+ }
+ for (auto &it : CurShuffUses)
+ ShuffUses[it.first] = it.second;
+ NewLoc = UI;
+ // Keep looking for the new location until can't proceed any longer.
+ findNewShuffLoc(UI, ShuffMask, NewLoc);
+ }
+ return true;
+}
+
+// Move shuffle instruction 'I' after 'NewLoc'.
+bool HexagonOptShuffleVector::relocateShuffVec(
+ Instruction *I, ArrayRef<int> &M, Value *NewLoc,
+ std::list<Instruction *> &WorkList) {
+ // Remove original vector shuffles at the input operands.
+ // However, it can be done only if the replacements have the
+ // same number of vector elements as the original operands.
+ std::map<Instruction *, Value *> InstrMap;
+ bool CanReplace = true;
+ unsigned ShuffInstCount = ShuffUses.size();
+ for (auto &it : ShuffUses) {
+ Instruction *J = it.first;
+ Visited.insert(J);
+ Value *ShuffleOP = nullptr;
+ match(J, (m_Shuffle(m_Value(ShuffleOP), m_Poison(), m_Mask(M))));
+ VectorType *JTy = cast<FixedVectorType>(J->getType());
+ VectorType *ShuffTy = cast<FixedVectorType>(ShuffleOP->getType());
+ if (JTy->getElementCount() != ShuffTy->getElementCount())
+ CanReplace = false;
+
+ // Relocate shufflevector after a wider instruction only if there are
+ // at least two or more shufflevectors being relocated in order for the
+ // relocation to be profitable as otherwise it will require more shuffles.
+ VectorType *NewShuffTy = cast<FixedVectorType>(NewLoc->getType());
+ if (ShuffInstCount == 1 &&
+ NewShuffTy->getElementType() > ShuffTy->getElementType())
+ CanReplace = false;
+ InstrMap[J] = ShuffleOP;
+ }
+ if (!CanReplace) {
+ LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n");
+ return false;
+ }
+ for (auto IM : InstrMap) {
+ Instruction *J = IM.first;
+ assert(ShuffUses.count(J));
+ SmallVector<Instruction *, 2> Uses = ShuffUses[J];
+ if (Uses.size() > 0) {
+ for (auto *U : Uses)
+ U->replaceUsesOfWith(IM.first, IM.second);
+ } else
+ // This is the shuffle we started with, and we have already made sure
+ // that it has either single use or a HI/LO use pair. So, it's okay
+ // to replace all its uses with the input to the shuffle instruction.
+ IM.first->replaceAllUsesWith(IM.second);
+ }
+ // Shuffle the output of NewLoc based on the original mask.
+ Instruction *Pos = dyn_cast<Instruction>(NewLoc);
+ assert(Pos);
+ Pos = Pos->getNextNode();
+ IRBuilder<> IRB(Pos);
+ Value *NewShuffV =
+ IRB.CreateShuffleVector(NewLoc, PoisonValue::get(NewLoc->getType()), M);
+ Instruction *NewInst = dyn_cast<Instruction>(NewShuffV);
+ if (!NewInst) {
+ LLVM_DEBUG(dbgs() << "\tRelocation FAILED!! \n");
+ return false;
+ }
+ for (auto UI = NewLoc->user_begin(), UE = NewLoc->user_end(); UI != UE;) {
+ Use &TheUse = UI.getUse();
+ ++UI;
+ Instruction *J = dyn_cast<Instruction>(TheUse.getUser());
+ if (J && TheUse.getUser() != NewShuffV)
+ J->replaceUsesOfWith(NewLoc, NewShuffV);
+ }
+ WorkList.push_back(NewInst);
+ LLVM_DEBUG(dbgs() << "\tRelocation Successfull!! \n");
+ LLVM_DEBUG(dbgs() << "\tAdded to Worklist :\n"; NewInst->dump());
+ return true;
+}
+
+bool HexagonOptShuffleVector::visitBlock(BasicBlock *B) {
+ bool Changed = false;
+ ArrayRef<int> M;
+ std::list<Instruction *> WorkList;
+ LLVM_DEBUG(dbgs() << "Preparing worklist for BB:\n");
+ LLVM_DEBUG(B->dump());
+ for (auto &I : *B) {
+ if (match(&I, (m_Shuffle(m_Value(), m_Value(), m_ZeroMask()))))
+ continue; // Skip - building vector from a scalar
+ if (match(&I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) {
+ WorkList.push_back(&I);
+ LLVM_DEBUG(dbgs() << "\tAdded instr - "; I.dump());
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Processing worklist:\n");
+ while (!WorkList.empty()) {
+#ifndef NDEBUG
+ int Limit = ShuffVecLimit;
+ if (Limit >= 0) {
+ if (NumRelocated >= ShuffVecLimit) {
+ LLVM_DEBUG({
+ dbgs() << "Reached maximum limit!! \n";
+ dbgs() << "Can't process any more shuffles.... \n";
+ });
+ return Changed;
+ }
+ }
+#endif
+ Instruction *I = WorkList.front();
+ WorkList.pop_front();
+ LLVM_DEBUG(dbgs() << "\tProcessing instr - "; I->dump());
+ Value *NewLoc = nullptr;
+
+ // 'ShuffUses' is used to keep track of the vector shuffles that need to
+ // be relocated along with their immediate uses that are known to satisfy
+ // all the safety requirements of the relocation.
+ // NOTE: The shuffle instr 'I', where the analysis starts, doesn't have
+ // its immediate uses set in 'ShuffUses'. This can be done but isn't
+ // necessary. At this point, only shuffles with single use or a HI/LO pair
+ // are allowed. This is done mostly because those with the multiple uses
+ // aren't expected to be much profitable and can be extended in the future
+ // if necessary. For now, all the uses in such cases can be safely updated
+ // when the corresponding vector shuffle is relocated.
+
+ ShuffUses.clear();
+ ShuffUses[I] = SmallVector<Instruction *, 2>();
+ // Skip if node already visited.
+ if (!Visited.insert(I).second) {
+ LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Already visited ...\n");
+ continue;
+ }
+ if (!match(I, (m_Shuffle(m_Value(), m_Poison(), m_Mask(M))))) {
+ LLVM_DEBUG(dbgs() << "\t\tSKIPPING - Not a vector shuffle ...\n");
+ continue;
+ }
+ if (!findNewShuffLoc(I, M, NewLoc) || !NewLoc) {
+ LLVM_DEBUG(dbgs() << "\t\tSKIPPING - NewLoc not found ...\n");
+ continue;
+ }
+ LLVM_DEBUG(dbgs() << "\t\tRelocating after -- "; NewLoc->dump());
+ Changed |= relocateShuffVec(I, M, NewLoc, WorkList);
+#ifndef NDEBUG
+ NumRelocated++;
+#endif
+ }
+ return Changed;
+}
+
+bool HexagonOptShuffleVector::runOnFunction(Function &F) {
+ HST = TM->getSubtargetImpl(F);
+ // Works only for 128B mode but can be extended for 64B if needed.
+ if (skipFunction(F) || !HST->useHVX128BOps())
+ return false;
+
+ bool Changed = false;
+ for (auto &B : F)
+ Changed |= visitBlock(&B);
+
+ return Changed;
+}
+
+FunctionPass *
+llvm::createHexagonOptShuffleVector(const HexagonTargetMachine &TM) {
+ return new HexagonOptShuffleVector(&TM);
+}
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index e40dbd2..e84070f 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -391,7 +391,6 @@ def Fptoui: pf1<fp_to_uint>;
def Sitofp: pf1<sint_to_fp>;
def Uitofp: pf1<uint_to_fp>;
-
// --(1) Immediate -------------------------------------------------------
//
@@ -474,6 +473,18 @@ def: OpR_R_pat<F2_conv_df2uw_chop, pf1<fp_to_uint>, i32, F64>;
def: OpR_R_pat<F2_conv_sf2ud_chop, pf1<fp_to_uint>, i64, F32>;
def: OpR_R_pat<F2_conv_df2ud_chop, pf1<fp_to_uint>, i64, F64>;
+def: Pat<(i32 (fp_to_bf16 F32:$v)),
+ (C2_mux (F2_sfclass F32:$v, 0x10), (A2_tfrsi(i32 0x7fff)),
+ (C2_mux
+ (C2_cmpeq
+ (A2_and F32:$v, (A2_tfrsi (i32 0x1FFFF))),
+ (A2_tfrsi (i32 0x08000))),
+ (A2_and (A2_asrh F32:$v), (A2_tfrsi (i32 65535))),
+ (A2_and
+ (A2_asrh
+ (A2_add F32:$v, (A2_and F32:$v, (A2_tfrsi (i32 0x8000))))),
+ (A2_tfrsi (i32 65535))))
+ )>;
// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
def: Pat<(i32 (bitconvert F32:$v)), (I32:$v)>;
def: Pat<(f32 (bitconvert I32:$v)), (F32:$v)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
index d19920c..674d191 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatternsHVX.td
@@ -15,12 +15,14 @@ def HVI16: PatLeaf<(VecI16 HvxVR:$R)>;
def HVI32: PatLeaf<(VecI32 HvxVR:$R)>;
def HVF16: PatLeaf<(VecF16 HvxVR:$R)>;
def HVF32: PatLeaf<(VecF32 HvxVR:$R)>;
+def HVBF16: PatLeaf<(VecBF16 HvxVR:$R)>;
def HWI8: PatLeaf<(VecPI8 HvxWR:$R)>;
def HWI16: PatLeaf<(VecPI16 HvxWR:$R)>;
def HWI32: PatLeaf<(VecPI32 HvxWR:$R)>;
def HWF16: PatLeaf<(VecPF16 HvxWR:$R)>;
def HWF32: PatLeaf<(VecPF32 HvxWR:$R)>;
+def HWBF16: PatLeaf<(VecBF16 HvxWR:$R)>;
def SDTVecUnaryOp:
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
@@ -182,12 +184,15 @@ let Predicates = [UseHVX] in {
}
let Predicates = [UseHVXV68] in {
- defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>;
- defm: HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>;
- defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>;
- defm: HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>;
- defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>;
- defm: HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecBF16, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF16, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_nt_ai, alignednontemporalload, VecF32, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecBF16, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecF16, IsVecOff>;
+ defm : HvxLda_pat<V6_vL32b_ai, alignedload, VecF32, IsVecOff>;
+ defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecBF16, IsVecOff>;
+ defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF16, IsVecOff>;
+ defm : HvxLd_pat<V6_vL32Ub_ai, unalignedload, VecF32, IsVecOff>;
}
// HVX stores
@@ -233,10 +238,13 @@ let Predicates = [UseHVX] in {
}
let Predicates = [UseHVXV68] in {
+ defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVBF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32b_nt_ai, alignednontemporalstore, HVF32, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVBF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32b_ai, alignedstore, HVF32, IsVecOff>;
+ defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVBF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF16, IsVecOff>;
defm: HvxSt_pat<V6_vS32Ub_ai, unalignedstore, HVF32, IsVecOff>;
}
@@ -253,20 +261,36 @@ let Predicates = [UseHVX] in {
defm: NopCast_pat<VecPI16, VecPI32, HvxWR>;
}
+let Predicates = [UseHVXV68] in {
+ defm: NopCast_pat<VecI8, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI16, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI32, VecF16, HvxVR>;
+ defm: NopCast_pat<VecF32, VecF16, HvxVR>;
+ defm: NopCast_pat<VecPI8, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPF32, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPF32, HvxWR>;
+}
+
let Predicates = [UseHVX, UseHVXFloatingPoint] in {
defm: NopCast_pat<VecI8, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI8, VecBF16, HvxVR>;
defm: NopCast_pat<VecI8, VecF32, HvxVR>;
defm: NopCast_pat<VecI16, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI16, VecBF16, HvxVR>;
defm: NopCast_pat<VecI16, VecF32, HvxVR>;
defm: NopCast_pat<VecI32, VecF16, HvxVR>;
+ defm: NopCast_pat<VecI32, VecBF16, HvxVR>;
defm: NopCast_pat<VecI32, VecF32, HvxVR>;
defm: NopCast_pat<VecF16, VecF32, HvxVR>;
defm: NopCast_pat<VecPI8, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI8, VecPBF16, HvxWR>;
defm: NopCast_pat<VecPI8, VecPF32, HvxWR>;
defm: NopCast_pat<VecPI16, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI16, VecPBF16, HvxWR>;
defm: NopCast_pat<VecPI16, VecPF32, HvxWR>;
defm: NopCast_pat<VecPI32, VecPF16, HvxWR>;
+ defm: NopCast_pat<VecPI32, VecPBF16, HvxWR>;
defm: NopCast_pat<VecPI32, VecPF32, HvxWR>;
defm: NopCast_pat<VecPF16, VecPF32, HvxWR>;
}
@@ -293,6 +317,8 @@ let Predicates = [UseHVX] in {
(Combinev HvxVR:$Vt, HvxVR:$Vs)>;
def: Pat<(VecPI32 (concat_vectors HVI32:$Vs, HVI32:$Vt)),
(Combinev HvxVR:$Vt, HvxVR:$Vs)>;
+ def: Pat<(VecPF32 (concat_vectors HVF32:$Vs, HVF32:$Vt)),
+ (Combinev HvxVR:$Vt, HvxVR:$Vs)>;
def: Pat<(VecQ8 (qcat HQ16:$Qs, HQ16:$Qt)), (Combineq $Qt, $Qs)>;
def: Pat<(VecQ16 (qcat HQ32:$Qs, HQ32:$Qt)), (Combineq $Qt, $Qs)>;
@@ -315,11 +341,14 @@ let Predicates = [UseHVX] in {
let Predicates = [UseHVX, UseHVXFloatingPoint] in {
let AddedComplexity = 100 in {
def: Pat<(VecF16 vzero), (V6_vd0)>;
+ def: Pat<(VecBF16 vzero), (V6_vd0)>;
def: Pat<(VecF32 vzero), (V6_vd0)>;
def: Pat<(VecPF16 vzero), (PS_vdd0)>;
+ def: Pat<(VecPBF16 vzero), (PS_vdd0)>;
def: Pat<(VecPF32 vzero), (PS_vdd0)>;
def: Pat<(concat_vectors (VecF16 vzero), (VecF16 vzero)), (PS_vdd0)>;
+ def : Pat<(concat_vectors (VecBF16 vzero), (VecBF16 vzero)), (PS_vdd0)>;
def: Pat<(concat_vectors (VecF32 vzero), (VecF32 vzero)), (PS_vdd0)>;
}
@@ -355,11 +384,13 @@ let Predicates = [UseHVX] in {
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
let AddedComplexity = 30 in {
def: Pat<(VecF16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>;
+ def: Pat<(VecBF16 (splat_vector u16_0ImmPred:$V)), (PS_vsplatih imm:$V)>;
def: Pat<(VecF32 (splat_vector anyint:$V)), (PS_vsplatiw imm:$V)>;
def: Pat<(VecF32 (splat_vector f32ImmPred:$V)), (PS_vsplatiw (ftoi $V))>;
}
let AddedComplexity = 20 in {
def: Pat<(VecF16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>;
+ def: Pat<(VecBF16 (splat_vector I32:$Rs)), (PS_vsplatrh $Rs)>;
def: Pat<(VecF32 (splat_vector I32:$Rs)), (PS_vsplatrw $Rs)>;
def: Pat<(VecF32 (splat_vector F32:$Rs)), (PS_vsplatrw $Rs)>;
}
@@ -519,6 +550,35 @@ let Predicates = [UseHVXV68, UseHVXIEEEFP] in {
def: Pat<(VecPF16 (Uitofp HVI8:$Vu)), (V6_vcvt_hf_ub HvxVR:$Vu)>;
}
+let Predicates = [UseHVXV81] in {
+ def : Pat<(VecBF16 (pf1<fpround> HWF32:$Vuu)),
+ (V6_vpackwuh_sat (V6_vmux
+ (V6_veqsf (HiVec HvxWR:$Vuu), (HiVec HvxWR:$Vuu)),
+ (V6_vlsrw (V6_vmux (V6_veqw (V6_vand (HiVec HvxWR:$Vuu),
+ (PS_vsplatiw (i32 0x1FFFF))),
+ (PS_vsplatiw (i32 0x08000))),
+ (HiVec HvxWR:$Vuu),
+ (V6_vaddw (HiVec HvxWR:$Vuu),
+ (V6_vand (HiVec HvxWR:$Vuu),
+ (PS_vsplatiw (i32 0x8000))))),
+ (A2_tfrsi 16)),
+ (PS_vsplatih (i32 0x7fff))),
+ (V6_vmux (V6_veqsf (LoVec HvxWR:$Vuu), (LoVec HvxWR:$Vuu)),
+ (V6_vlsrw (V6_vmux (V6_veqw (V6_vand (LoVec HvxWR:$Vuu),
+ (PS_vsplatiw (i32 0x1FFFF))),
+ (PS_vsplatiw (i32 0x08000))),
+ (LoVec HvxWR:$Vuu),
+ (V6_vaddw (LoVec HvxWR:$Vuu),
+ (V6_vand (LoVec HvxWR:$Vuu),
+ (PS_vsplatiw (i32 0x8000))))),
+ (A2_tfrsi 16)),
+ (PS_vsplatih (i32 0x7fff))))>;
+}
+
+let Predicates = [UseHVXV73, UseHVXQFloat] in {
+ def : Pat<(VecF32 (Sitofp HVI32:$Vu)), (V6_vconv_sf_w HvxVR:$Vu)>;
+}
+
let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
def: Pat<(vselect HQ16:$Qu, HVF16:$Vs, HVF16:$Vt),
(V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
@@ -531,6 +591,13 @@ let Predicates = [UseHVXV68, UseHVXFloatingPoint] in {
(V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
}
+let Predicates = [UseHVXV81, UseHVXFloatingPoint] in {
+ def : Pat<(vselect HQ16:$Qu, HVBF16:$Vs, HVBF16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vs, HvxVR:$Vt)>;
+ def : Pat<(vselect (qnot HQ16:$Qu), HVBF16:$Vs, HVBF16:$Vt),
+ (V6_vmux HvxQR:$Qu, HvxVR:$Vt, HvxVR:$Vs)>;
+}
+
let Predicates = [UseHVXV68, UseHVX128B, UseHVXQFloat] in {
let AddedComplexity = 220 in {
defm: MinMax_pats<V6_vmin_hf, V6_vmax_hf, vselect, setgt, VecQ16, HVF16>;
diff --git a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
index f29a739..c9cb449 100644
--- a/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonQFPOptimizer.cpp
@@ -58,7 +58,7 @@
// are PHI inst.
//
//===----------------------------------------------------------------------===//
-#include <unordered_set>
+
#define HEXAGON_QFP_OPTIMIZER "QFP optimizer pass"
#include "Hexagon.h"
@@ -77,7 +77,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
-#include <vector>
#define DEBUG_TYPE "hexagon-qfp-optimizer"
@@ -86,6 +85,9 @@ using namespace llvm;
cl::opt<bool>
DisableQFOptimizer("disable-qfp-opt", cl::init(false),
cl::desc("Disable optimization of Qfloat operations."));
+cl::opt<bool> DisableQFOptForMul(
+ "disable-qfp-opt-mul", cl::init(true),
+ cl::desc("Disable optimization of Qfloat operations for multiply."));
namespace {
const std::map<unsigned short, unsigned short> QFPInstMap{
@@ -101,11 +103,16 @@ const std::map<unsigned short, unsigned short> QFPInstMap{
{Hexagon::V6_vmpy_qf16_mix_hf, Hexagon::V6_vmpy_qf16},
{Hexagon::V6_vmpy_qf32_hf, Hexagon::V6_vmpy_qf32_mix_hf},
{Hexagon::V6_vmpy_qf32_mix_hf, Hexagon::V6_vmpy_qf32_qf16},
- {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32}};
+ {Hexagon::V6_vmpy_qf32_sf, Hexagon::V6_vmpy_qf32},
+ {Hexagon::V6_vilog2_sf, Hexagon::V6_vilog2_qf32},
+ {Hexagon::V6_vilog2_hf, Hexagon::V6_vilog2_qf16},
+ {Hexagon::V6_vabs_qf32_sf, Hexagon::V6_vabs_qf32_qf32},
+ {Hexagon::V6_vabs_qf16_hf, Hexagon::V6_vabs_qf16_qf16},
+ {Hexagon::V6_vneg_qf32_sf, Hexagon::V6_vneg_qf32_qf32},
+ {Hexagon::V6_vneg_qf16_hf, Hexagon::V6_vneg_qf16_qf16}};
} // namespace
namespace {
-
struct HexagonQFPOptimizer : public MachineFunctionPass {
public:
static char ID;
@@ -116,6 +123,10 @@ public:
bool optimizeQfp(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool optimizeQfpTwoOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
+ bool optimizeQfpOneOp(MachineInstr *MI, MachineBasicBlock *MBB);
+
StringRef getPassName() const override { return HEXAGON_QFP_OPTIMIZER; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -142,19 +153,69 @@ FunctionPass *llvm::createHexagonQFPOptimizer() {
bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
MachineBasicBlock *MBB) {
- // Early exit:
- // - if instruction is invalid or has too few operands (QFP ops need 2 sources
- // + 1 dest),
- // - or does not have a transformation mapping.
- if (MI->getNumOperands() < 3)
+ if (MI->getNumOperands() == 2)
+ return optimizeQfpOneOp(MI, MBB);
+ else if (MI->getNumOperands() == 3)
+ return optimizeQfpTwoOp(MI, MBB);
+ else
return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpOneOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+
+ unsigned Op0F = 0;
auto It = QFPInstMap.find(MI->getOpcode());
if (It == QFPInstMap.end())
return false;
+
unsigned short InstTy = It->second;
+ // Get the reachind defs of MI
+ MachineInstr *DefMI = MRI->getVRegDef(MI->getOperand(1).getReg());
+ MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI->dump());
+ MachineInstr *ReachDefDef = nullptr;
+
+ // Get the reaching def of the reaching def to check for W reg def
+ if (DefMI->getNumOperands() > 1 && DefMI->getOperand(1).isReg() &&
+ DefMI->getOperand(1).getReg().isVirtual())
+ ReachDefDef = MRI->getVRegDef(DefMI->getOperand(1).getReg());
+ unsigned ReachDefOp = DefMI->getOpcode();
+ MachineInstrBuilder MIB;
+
+ // Check if the reaching def is a conversion
+ if (ReachDefOp == Hexagon::V6_vconv_sf_qf32 ||
+ ReachDefOp == Hexagon::V6_vconv_hf_qf16) {
+
+ // Return if the reaching def of reaching def is W type
+ if (ReachDefDef && MRI->getRegClass(ReachDefDef->getOperand(0).getReg()) ==
+ &Hexagon::HvxWRRegClass)
+ return false;
+
+ // Analyze the use operands of the conversion to get their KILL status
+ MachineOperand &SrcOp = DefMI->getOperand(1);
+ Op0F = getKillRegState(SrcOp.isKill());
+ SrcOp.setIsKill(false);
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(SrcOp.getReg(), Op0F, SrcOp.getSubReg());
+ LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
+ return true;
+ }
+ return false;
+}
+
+bool HexagonQFPOptimizer::optimizeQfpTwoOp(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
unsigned Op0F = 0;
unsigned Op1F = 0;
+ auto It = QFPInstMap.find(MI->getOpcode());
+ if (It == QFPInstMap.end())
+ return false;
+ unsigned short InstTy = It->second;
// Get the reaching defs of MI, DefMI1 and DefMI2
MachineInstr *DefMI1 = nullptr;
MachineInstr *DefMI2 = nullptr;
@@ -167,6 +228,9 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
return false;
MachineOperand &Res = MI->getOperand(0);
+ if (!Res.isReg())
+ return false;
+
MachineInstr *Inst1 = nullptr;
MachineInstr *Inst2 = nullptr;
LLVM_DEBUG(dbgs() << "\n[Reaching Defs of operands]: "; DefMI1->dump();
@@ -185,7 +249,8 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
unsigned Def2OP = DefMI2->getOpcode();
MachineInstrBuilder MIB;
- // Case 1: Both reaching defs of MI are qf to sf/hf conversions
+
+ // Check if the both the reaching defs of MI are qf to sf/hf conversions
if ((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -226,7 +291,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if left operand's reaching def is a conversion to sf/hf
} else if (((Def1OP == Hexagon::V6_vconv_sf_qf32 &&
Def2OP != Hexagon::V6_vconv_sf_qf32) ||
(Def1OP == Hexagon::V6_vconv_hf_qf16 &&
@@ -250,7 +315,7 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
- // Case 2: Left operand is conversion to sf/hf
+ // Check if right operand's reaching def is a conversion to sf/hf
} else if (((Def1OP != Hexagon::V6_vconv_sf_qf32 &&
Def2OP == Hexagon::V6_vconv_sf_qf32) ||
(Def1OP != Hexagon::V6_vconv_hf_qf16 &&
@@ -258,13 +323,6 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
!DefMI1->isPHI() &&
(MI->getOpcode() != Hexagon::V6_vmpy_qf32_sf)) {
// The second operand of original instruction is converted.
- // In "mix" instructions, "qf" operand is always the first operand.
-
- // Caveat: vsub is not commutative w.r.t operands.
- if (InstTy == Hexagon::V6_vsub_qf16_mix ||
- InstTy == Hexagon::V6_vsub_qf32_mix)
- return false;
-
if (Inst2 && MRI->getRegClass(Inst2->getOperand(0).getReg()) ==
&Hexagon::HvxWRRegClass)
return false;
@@ -275,10 +333,26 @@ bool HexagonQFPOptimizer::optimizeQfp(MachineInstr *MI,
Op1F = getKillRegState(Src2.isKill());
Src2.setIsKill(false);
Op0F = getKillRegState(Src1.isKill());
- MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
- .addReg(Src2.getReg(), Op1F,
- Src2.getSubReg()) // Notice the operands are flipped.
- .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ if (InstTy == Hexagon::V6_vsub_qf16_mix ||
+ InstTy == Hexagon::V6_vsub_qf32_mix) {
+ if (!HST->useHVXV81Ops())
+ // vsub_(hf|sf)_mix insts are only avlbl on hvx81+
+ return false;
+ // vsub is not commutative w.r.t. operands -> treat it as a special case
+ // to choose the correct mix instruction.
+ if (Def2OP == Hexagon::V6_vconv_sf_qf32)
+ InstTy = Hexagon::V6_vsub_sf_mix;
+ else if (Def2OP == Hexagon::V6_vconv_hf_qf16)
+ InstTy = Hexagon::V6_vsub_hf_mix;
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg())
+ .addReg(Src2.getReg(), Op1F, Src2.getSubReg());
+ } else {
+ MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), HII->get(InstTy), Res.getReg())
+ .addReg(Src2.getReg(), Op1F,
+ Src2.getSubReg()) // Notice the operands are flipped.
+ .addReg(Src1.getReg(), Op0F, Src1.getSubReg());
+ }
LLVM_DEBUG(dbgs() << "\n[Inserting]: "; MIB.getInstr()->dump());
return true;
}
@@ -309,15 +383,18 @@ bool HexagonQFPOptimizer::runOnMachineFunction(MachineFunction &MF) {
while (MII != MBBI->instr_end()) {
MachineInstr *MI = &*MII;
++MII; // As MI might be removed.
-
- if (QFPInstMap.count(MI->getOpcode()) &&
- MI->getOpcode() != Hexagon::V6_vconv_sf_qf32 &&
- MI->getOpcode() != Hexagon::V6_vconv_hf_qf16) {
- LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
- if (optimizeQfp(MI, MBB)) {
- MI->eraseFromParent();
- LLVM_DEBUG(dbgs() << "\t....Removing....");
- Changed = true;
+ if (QFPInstMap.count(MI->getOpcode())) {
+ auto OpC = MI->getOpcode();
+ if (DisableQFOptForMul && HII->isQFPMul(MI))
+ continue;
+ if (OpC != Hexagon::V6_vconv_sf_qf32 &&
+ OpC != Hexagon::V6_vconv_hf_qf16) {
+ LLVM_DEBUG(dbgs() << "\n###Analyzing for removal: "; MI->dump());
+ if (optimizeQfp(MI, MBB)) {
+ MI->eraseFromParent();
+ LLVM_DEBUG(dbgs() << "\t....Removing....");
+ Changed = true;
+ }
}
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 54f5608..f375b25 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -34,7 +34,6 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <limits>
-#include <utility>
using namespace llvm;
using namespace rdf;
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 3a77fcd..1f1aebd 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -15,141 +15,126 @@ let Namespace = "Hexagon" in {
class HexagonReg<bits<5> num, string n, list<string> alt = [],
list<Register> alias = []> : Register<n, alt> {
let Aliases = alias;
- let HWEncoding{4-0} = num;
+ let HWEncoding{4 -0} = num;
}
// These registers are used to preserve a distinction between
// vector register pairs of differing order.
- class HexagonFakeReg<string n> : Register<n> {
- let isArtificial = 1;
- }
+ class HexagonFakeReg<string n> : Register<n> { let isArtificial = 1; }
class HexagonDoubleReg<bits<5> num, string n, list<Register> subregs,
- list<string> alt = []> :
- RegisterWithSubRegs<n, subregs> {
+ list<string> alt = []>
+ : RegisterWithSubRegs<n, subregs> {
let AltNames = alt;
- let HWEncoding{4-0} = num;
+ let HWEncoding{4 -0} = num;
}
class HexagonSys<bits<7> num, string n, list<string> alt = [],
list<Register> alias = []> : Register<n, alt> {
let Aliases = alias;
- let HWEncoding{6-0} = num;
+ let HWEncoding{6 -0} = num;
}
class HexagonDoubleSys<bits<7> num, string n, list<Register> subregs,
- list<string> alt = []> :
- RegisterWithSubRegs<n, subregs> {
+ list<string> alt = []>
+ : RegisterWithSubRegs<n, subregs> {
let AltNames = alt;
- let HWEncoding{6-0} = num;
+ let HWEncoding{6 -0} = num;
}
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers.
- class Ri<bits<5> num, string n, list<string> alt = []> :
- HexagonReg<num, n, alt>;
+ class Ri<bits<5> num, string n, list<string> alt = []>
+ : HexagonReg<num, n, alt>;
// Rp - false/pseudo registers. These registers are used
// to provide a distinct set of aliases for both styles of vector
// register pairs without encountering subregister indexing constraints.
- class R_fake<string n> :
- HexagonFakeReg<n>;
-
+ class R_fake<string n> : HexagonFakeReg<n>;
// Rf - 32-bit floating-point registers.
class Rf<bits<5> num, string n> : HexagonReg<num, n>;
// Rd - 64-bit registers.
- class Rd<bits<5> num, string n, list<Register> subregs,
- list<string> alt = []> :
- HexagonDoubleReg<num, n, subregs, alt> {
+ class Rd<bits<5> num, string n, list<Register> subregs, list<string> alt = []>
+ : HexagonDoubleReg<num, n, subregs, alt> {
let SubRegs = subregs;
}
// Rp - predicate registers
class Rp<bits<5> num, string n> : HexagonReg<num, n>;
-
// Rq - vector predicate registers
class Rq<bits<3> num, string n> : Register<n, []> {
- let HWEncoding{2-0} = num;
+ let HWEncoding{2 -0} = num;
}
// Rc - control registers
- class Rc<bits<5> num, string n,
- list<string> alt = [], list<Register> alias = []> :
- HexagonReg<num, n, alt, alias>;
+ class Rc<bits<5> num, string n, list<string> alt = [],
+ list<Register> alias = []> : HexagonReg<num, n, alt, alias>;
// Rcc - 64-bit control registers.
class Rcc<bits<5> num, string n, list<Register> subregs,
- list<string> alt = []> :
- HexagonDoubleReg<num, n, subregs, alt> {
+ list<string> alt = []> : HexagonDoubleReg<num, n, subregs, alt> {
let SubRegs = subregs;
}
// Rs - system registers
- class Rs<bits<7> num, string n,
- list<string> alt = [], list<Register> alias = []> :
- HexagonSys<num, n, alt, alias>;
+ class Rs<bits<7> num, string n, list<string> alt = [],
+ list<Register> alias = []> : HexagonSys<num, n, alt, alias>;
// Rss - 64-bit system registers.
class Rss<bits<7> num, string n, list<Register> subregs,
- list<string> alt = []> :
- HexagonDoubleSys<num, n, subregs, alt> {
+ list<string> alt = []> : HexagonDoubleSys<num, n, subregs, alt> {
let SubRegs = subregs;
}
// Mx - address modifier registers
- class Mx<bits<1> num, string n> : Register<n, []> {
- let HWEncoding{0} = num;
- }
+ class Mx<bits<1> num, string n> : Register<n, []> { let HWEncoding{0} = num; }
// Rg - Guest/Hypervisor registers
- class Rg<bits<5> num, string n,
- list<string> alt = [], list<Register> alias = []> :
- HexagonReg<num, n, alt, alias>;
+ class Rg<bits<5> num, string n, list<string> alt = [],
+ list<Register> alias = []> : HexagonReg<num, n, alt, alias>;
// Rgg - 64-bit Guest/Hypervisor registers
- class Rgg<bits<5> num, string n, list<Register> subregs> :
- HexagonDoubleReg<num, n, subregs> {
+ class Rgg<bits<5> num, string n, list<Register> subregs>
+ : HexagonDoubleReg<num, n, subregs> {
let SubRegs = subregs;
}
- def isub_lo : SubRegIndex<32>;
- def isub_hi : SubRegIndex<32, 32>;
- def vsub_lo : SubRegIndex<-1, -1>;
- def vsub_hi : SubRegIndex<-1, -1>;
- def vsub_fake: SubRegIndex<-1, -1>;
- def wsub_lo : SubRegIndex<-1, -1>;
- def wsub_hi : SubRegIndex<-1, -1>;
+ def isub_lo : SubRegIndex<32>;
+ def isub_hi : SubRegIndex<32, 32>;
+ def vsub_lo : SubRegIndex<-1, -1>;
+ def vsub_hi : SubRegIndex<-1, -1>;
+ def vsub_fake : SubRegIndex<-1, -1>;
+ def wsub_lo : SubRegIndex<-1, -1>;
+ def wsub_hi : SubRegIndex<-1, -1>;
def subreg_overflow : SubRegIndex<1, 0>;
// Integer registers.
- foreach i = 0-28 in {
- def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>;
- }
+ foreach i = 0 -28 in { def R#i : Ri<i, "r"#i>, DwarfRegNum<[i]>; }
def R29 : Ri<29, "r29", ["sp"]>, DwarfRegNum<[29]>;
def R30 : Ri<30, "r30", ["fp"]>, DwarfRegNum<[30]>;
def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
// Aliases of the R* registers used to hold 64-bit int values (doubles).
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def D0 : Rd< 0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
- def D1 : Rd< 2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
- def D2 : Rd< 4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
- def D3 : Rd< 6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
- def D4 : Rd< 8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
- def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
- def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
- def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
- def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
- def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
- def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
- def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
- def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
- def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
- def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
- def D15 : Rd<30, "r31:30", [R30, R31], ["lr:fp"]>, DwarfRegNum<[62]>;
+ def D0 : Rd<0, "r1:0", [R0, R1]>, DwarfRegNum<[32]>;
+ def D1 : Rd<2, "r3:2", [R2, R3]>, DwarfRegNum<[34]>;
+ def D2 : Rd<4, "r5:4", [R4, R5]>, DwarfRegNum<[36]>;
+ def D3 : Rd<6, "r7:6", [R6, R7]>, DwarfRegNum<[38]>;
+ def D4 : Rd<8, "r9:8", [R8, R9]>, DwarfRegNum<[40]>;
+ def D5 : Rd<10, "r11:10", [R10, R11]>, DwarfRegNum<[42]>;
+ def D6 : Rd<12, "r13:12", [R12, R13]>, DwarfRegNum<[44]>;
+ def D7 : Rd<14, "r15:14", [R14, R15]>, DwarfRegNum<[46]>;
+ def D8 : Rd<16, "r17:16", [R16, R17]>, DwarfRegNum<[48]>;
+ def D9 : Rd<18, "r19:18", [R18, R19]>, DwarfRegNum<[50]>;
+ def D10 : Rd<20, "r21:20", [R20, R21]>, DwarfRegNum<[52]>;
+ def D11 : Rd<22, "r23:22", [R22, R23]>, DwarfRegNum<[54]>;
+ def D12 : Rd<24, "r25:24", [R24, R25]>, DwarfRegNum<[56]>;
+ def D13 : Rd<26, "r27:26", [R26, R27]>, DwarfRegNum<[58]>;
+ def D14 : Rd<28, "r29:28", [R28, R29]>, DwarfRegNum<[60]>;
+ def D15 : Rd<30, "r31:30", [R30, R31], ["lr:fp"]>, DwarfRegNum<[62]>;
}
// Predicate registers.
@@ -164,119 +149,118 @@ let Namespace = "Hexagon" in {
// on the entire USR.
def USR_OVF : Rc<?, "usr.ovf">;
- def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> {
+ def USR : Rc<8, "usr", ["c8"]>, DwarfRegNum<[75]> {
let SubRegIndices = [subreg_overflow];
let SubRegs = [USR_OVF];
}
// Control registers.
- def SA0: Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
- def LC0: Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
- def SA1: Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
- def LC1: Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
- def P3_0: Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>,
- DwarfRegNum<[71]>;
+ def SA0 : Rc<0, "sa0", ["c0"]>, DwarfRegNum<[67]>;
+ def LC0 : Rc<1, "lc0", ["c1"]>, DwarfRegNum<[68]>;
+ def SA1 : Rc<2, "sa1", ["c2"]>, DwarfRegNum<[69]>;
+ def LC1 : Rc<3, "lc1", ["c3"]>, DwarfRegNum<[70]>;
+ def P3_0 : Rc<4, "p3:0", ["c4"], [P0, P1, P2, P3]>, DwarfRegNum<[71]>;
// When defining more Cn registers, make sure to explicitly mark them
// as reserved in HexagonRegisterInfo.cpp.
- def C5: Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>;
- def M0: Rc<6, "m0", ["c6"]>, DwarfRegNum<[73]>;
- def M1: Rc<7, "m1", ["c7"]>, DwarfRegNum<[74]>;
+ def C5 : Rc<5, "c5", ["c5"]>, DwarfRegNum<[72]>;
+ def M0 : Rc<6, "m0", ["c6"]>, DwarfRegNum<[73]>;
+ def M1 : Rc<7, "m1", ["c7"]>, DwarfRegNum<[74]>;
// Define C8 separately and make it aliased with USR.
// The problem is that USR has subregisters (e.g. overflow). If USR was
// specified as a subregister of C9_8, it would imply that subreg_overflow
// and isub_lo can be composed, which leads to all kinds of issues
// with lane masks.
- def C8: Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
- def PC: Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>;
- def UGP: Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
- def GP: Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
- def CS0: Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
- def CS1: Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
- def UPCYCLELO: Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
- def UPCYCLEHI: Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
- def FRAMELIMIT: Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>;
- def FRAMEKEY: Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>;
- def PKTCOUNTLO: Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>;
- def PKTCOUNTHI: Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>;
- def UTIMERLO: Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>;
- def UTIMERHI: Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>;
+ def C8 : Rc<8, "c8", [], [USR]>, DwarfRegNum<[75]>;
+ def PC : Rc<9, "pc", ["c9"]>, DwarfRegNum<[76]>;
+ def UGP : Rc<10, "ugp", ["c10"]>, DwarfRegNum<[77]>;
+ def GP : Rc<11, "gp", ["c11"]>, DwarfRegNum<[78]>;
+ def CS0 : Rc<12, "cs0", ["c12"]>, DwarfRegNum<[79]>;
+ def CS1 : Rc<13, "cs1", ["c13"]>, DwarfRegNum<[80]>;
+ def UPCYCLELO : Rc<14, "upcyclelo", ["c14"]>, DwarfRegNum<[81]>;
+ def UPCYCLEHI : Rc<15, "upcyclehi", ["c15"]>, DwarfRegNum<[82]>;
+ def FRAMELIMIT : Rc<16, "framelimit", ["c16"]>, DwarfRegNum<[83]>;
+ def FRAMEKEY : Rc<17, "framekey", ["c17"]>, DwarfRegNum<[84]>;
+ def PKTCOUNTLO : Rc<18, "pktcountlo", ["c18"]>, DwarfRegNum<[85]>;
+ def PKTCOUNTHI : Rc<19, "pktcounthi", ["c19"]>, DwarfRegNum<[86]>;
+ def UTIMERLO : Rc<30, "utimerlo", ["c30"]>, DwarfRegNum<[97]>;
+ def UTIMERHI : Rc<31, "utimerhi", ["c31"]>, DwarfRegNum<[98]>;
// Control registers pairs.
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
- def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
- def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
- def C7_6 : Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>;
+ def C1_0 : Rcc<0, "c1:0", [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
+ def C3_2 : Rcc<2, "c3:2", [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+ def C5_4 : Rcc<4, "c5:4", [P3_0, C5]>, DwarfRegNum<[71]>;
+ def C7_6 : Rcc<6, "c7:6", [M0, M1], ["m1:0"]>, DwarfRegNum<[72]>;
// Use C8 instead of USR as a subregister of C9_8.
- def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
- def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
- def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
- def UPCYCLE: Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI], ["upcycle"]>,
- DwarfRegNum<[80]>;
- def C17_16 : Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>;
+ def C9_8 : Rcc<8, "c9:8", [C8, PC]>, DwarfRegNum<[74]>;
+ def C11_10 : Rcc<10, "c11:10", [UGP, GP]>, DwarfRegNum<[76]>;
+ def CS : Rcc<12, "c13:12", [CS0, CS1], ["cs1:0"]>, DwarfRegNum<[78]>;
+ def UPCYCLE : Rcc<14, "c15:14", [UPCYCLELO, UPCYCLEHI], ["upcycle"]>,
+ DwarfRegNum<[80]>;
+ def C17_16 : Rcc<16, "c17:16", [FRAMELIMIT, FRAMEKEY]>, DwarfRegNum<[83]>;
def PKTCOUNT : Rcc<18, "c19:18", [PKTCOUNTLO, PKTCOUNTHI], ["pktcount"]>,
- DwarfRegNum<[85]>;
- def UTIMER : Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>,
- DwarfRegNum<[97]>;
+ DwarfRegNum<[85]>;
+ def UTIMER : Rcc<30, "c31:30", [UTIMERLO, UTIMERHI], ["utimer"]>,
+ DwarfRegNum<[97]>;
}
- foreach i = 0-31 in {
- def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
- def VF#i : R_fake<"__"#!add(i,999999)>, DwarfRegNum<[!add(i, 999999)]>;
- def VFR#i : R_fake<"__"#!add(i,9999999)>, DwarfRegNum<[!add(i, 9999999)]>;
+ foreach i = 0 -31 in {
+ def V#i : Ri<i, "v"#i>, DwarfRegNum<[!add(i, 99)]>;
+ def VF#i : R_fake<"__"#!add(i, 999999)>, DwarfRegNum<[!add(i, 999999)]>;
+ def VFR#i : R_fake<"__"#!add(i, 9999999)>, DwarfRegNum<[!add(i, 9999999)]>;
}
def VTMP : Ri<0, "vtmp">, DwarfRegNum<[131]>;
// Aliases of the V* registers used to hold double vec values.
let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
- def W0 : Rd< 0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>;
- def W1 : Rd< 2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>;
- def W2 : Rd< 4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>;
- def W3 : Rd< 6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>;
- def W4 : Rd< 8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>;
- def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>;
- def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>;
- def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>;
- def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>;
- def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>;
- def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>;
- def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>;
- def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>;
- def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>;
- def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>;
- def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>;
+ def W0 : Rd<0, "v1:0", [V0, V1, VF0]>, DwarfRegNum<[99]>;
+ def W1 : Rd<2, "v3:2", [V2, V3, VF1]>, DwarfRegNum<[101]>;
+ def W2 : Rd<4, "v5:4", [V4, V5, VF2]>, DwarfRegNum<[103]>;
+ def W3 : Rd<6, "v7:6", [V6, V7, VF3]>, DwarfRegNum<[105]>;
+ def W4 : Rd<8, "v9:8", [V8, V9, VF4]>, DwarfRegNum<[107]>;
+ def W5 : Rd<10, "v11:10", [V10, V11, VF5]>, DwarfRegNum<[109]>;
+ def W6 : Rd<12, "v13:12", [V12, V13, VF6]>, DwarfRegNum<[111]>;
+ def W7 : Rd<14, "v15:14", [V14, V15, VF7]>, DwarfRegNum<[113]>;
+ def W8 : Rd<16, "v17:16", [V16, V17, VF8]>, DwarfRegNum<[115]>;
+ def W9 : Rd<18, "v19:18", [V18, V19, VF9]>, DwarfRegNum<[117]>;
+ def W10 : Rd<20, "v21:20", [V20, V21, VF10]>, DwarfRegNum<[119]>;
+ def W11 : Rd<22, "v23:22", [V22, V23, VF11]>, DwarfRegNum<[121]>;
+ def W12 : Rd<24, "v25:24", [V24, V25, VF12]>, DwarfRegNum<[123]>;
+ def W13 : Rd<26, "v27:26", [V26, V27, VF13]>, DwarfRegNum<[125]>;
+ def W14 : Rd<28, "v29:28", [V28, V29, VF14]>, DwarfRegNum<[127]>;
+ def W15 : Rd<30, "v31:30", [V30, V31, VF15]>, DwarfRegNum<[129]>;
}
// Reverse Aliases of the V* registers used to hold double vec values.
let SubRegIndices = [vsub_lo, vsub_hi, vsub_fake], CoveredBySubRegs = 1 in {
- def WR0 : Rd< 1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>;
- def WR1 : Rd< 3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>;
- def WR2 : Rd< 5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>;
- def WR3 : Rd< 7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>;
- def WR4 : Rd< 9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>;
- def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>;
- def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>;
- def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>;
- def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>;
- def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>;
- def WR10: Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>;
- def WR11: Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>;
- def WR12: Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>;
- def WR13: Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>;
- def WR14: Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>;
- def WR15: Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>;
+ def WR0 : Rd<1, "v0:1", [V0, V1, VFR0]>, DwarfRegNum<[161]>;
+ def WR1 : Rd<3, "v2:3", [V2, V3, VFR1]>, DwarfRegNum<[162]>;
+ def WR2 : Rd<5, "v4:5", [V4, V5, VFR2]>, DwarfRegNum<[163]>;
+ def WR3 : Rd<7, "v6:7", [V6, V7, VFR3]>, DwarfRegNum<[164]>;
+ def WR4 : Rd<9, "v8:9", [V8, V9, VFR4]>, DwarfRegNum<[165]>;
+ def WR5 : Rd<11, "v10:11", [V10, V11, VFR5]>, DwarfRegNum<[166]>;
+ def WR6 : Rd<13, "v12:13", [V12, V13, VFR6]>, DwarfRegNum<[167]>;
+ def WR7 : Rd<15, "v14:15", [V14, V15, VFR7]>, DwarfRegNum<[168]>;
+ def WR8 : Rd<17, "v16:17", [V16, V17, VFR8]>, DwarfRegNum<[169]>;
+ def WR9 : Rd<19, "v18:19", [V18, V19, VFR9]>, DwarfRegNum<[170]>;
+ def WR10 : Rd<21, "v20:21", [V20, V21, VFR10]>, DwarfRegNum<[171]>;
+ def WR11 : Rd<23, "v22:23", [V22, V23, VFR11]>, DwarfRegNum<[172]>;
+ def WR12 : Rd<25, "v24:25", [V24, V25, VFR12]>, DwarfRegNum<[173]>;
+ def WR13 : Rd<27, "v26:27", [V26, V27, VFR13]>, DwarfRegNum<[174]>;
+ def WR14 : Rd<29, "v28:29", [V28, V29, VFR14]>, DwarfRegNum<[175]>;
+ def WR15 : Rd<31, "v30:31", [V30, V31, VFR15]>, DwarfRegNum<[176]>;
}
// Aliases of the V* registers used to hold quad vec values.
let SubRegIndices = [wsub_lo, wsub_hi], CoveredBySubRegs = 1 in {
- def VQ0 : Rd< 0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>;
- def VQ1 : Rd< 4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>;
- def VQ2 : Rd< 8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>;
- def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>;
- def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>;
- def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>;
- def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>;
- def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>;
+ def VQ0 : Rd<0, "v3:0", [W0, W1]>, DwarfRegNum<[252]>;
+ def VQ1 : Rd<4, "v7:4", [W2, W3]>, DwarfRegNum<[253]>;
+ def VQ2 : Rd<8, "v11:8", [W4, W5]>, DwarfRegNum<[254]>;
+ def VQ3 : Rd<12, "v15:12", [W6, W7]>, DwarfRegNum<[255]>;
+ def VQ4 : Rd<16, "v19:16", [W8, W9]>, DwarfRegNum<[256]>;
+ def VQ5 : Rd<20, "v23:20", [W10, W11]>, DwarfRegNum<[257]>;
+ def VQ6 : Rd<24, "v27:24", [W12, W13]>, DwarfRegNum<[258]>;
+ def VQ7 : Rd<28, "v31:28", [W14, W15]>, DwarfRegNum<[259]>;
}
// Vector Predicate registers.
@@ -286,359 +270,357 @@ let Namespace = "Hexagon" in {
def Q3 : Rq<3, "q3">, DwarfRegNum<[134]>;
// System registers.
- def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>;
- def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>;
- def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>;
- def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>;
- def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>;
- def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>;
- def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>;
- def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>;
- def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>;
- def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>;
- def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>;
- def S11 : Rs<11, "s11">, DwarfRegNum<[155]>;
- def S12 : Rs<12, "s12">, DwarfRegNum<[156]>;
- def S13 : Rs<13, "s13">, DwarfRegNum<[157]>;
- def S14 : Rs<14, "s14">, DwarfRegNum<[158]>;
- def S15 : Rs<15, "s15">, DwarfRegNum<[159]>;
- def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>;
- def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>;
- def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>;
- def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>;
- def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>;
- def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>;
- def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>;
- def S23 : Rs<23, "s23">, DwarfRegNum<[167]>;
- def S24 : Rs<24, "s24">, DwarfRegNum<[168]>;
- def S25 : Rs<25, "s25">, DwarfRegNum<[169]>;
- def S26 : Rs<26, "s26">, DwarfRegNum<[170]>;
- def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>;
- def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>;
- def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>;
- def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>;
- def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>;
- def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>;
- def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>;
- def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>;
- def S35 : Rs<35, "s35">, DwarfRegNum<[179]>;
- def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>;
- def BRKPTCFG0: Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>;
- def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>;
- def BRKPTCFG1: Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>;
- def ISDBMBXIN: Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>;
- def ISDBMBXOUT: Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>;
- def ISDBEN: Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>;
- def ISDBGPR: Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>;
- def S44: Rs<44, "s44">, DwarfRegNum<[188]>;
- def S45: Rs<45, "s45">, DwarfRegNum<[189]>;
- def S46: Rs<46, "s46">, DwarfRegNum<[190]>;
- def S47: Rs<47, "s47">, DwarfRegNum<[191]>;
- def PMUCNT0: Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>;
- def PMUCNT1: Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>;
- def PMUCNT2: Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>;
- def PMUCNT3: Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>;
- def PMUEVTCFG: Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>;
- def PMUCFG: Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>;
- def S54: Rs<54, "s54">, DwarfRegNum<[198]>;
- def S55: Rs<55, "s55">, DwarfRegNum<[199]>;
- def S56: Rs<56, "s56">, DwarfRegNum<[200]>;
- def S57: Rs<57, "s57">, DwarfRegNum<[201]>;
- def S58: Rs<58, "s58">, DwarfRegNum<[202]>;
- def S59: Rs<59, "s59">, DwarfRegNum<[203]>;
- def S60: Rs<60, "s60">, DwarfRegNum<[204]>;
- def S61: Rs<61, "s61">, DwarfRegNum<[205]>;
- def S62: Rs<62, "s62">, DwarfRegNum<[206]>;
- def S63: Rs<63, "s63">, DwarfRegNum<[207]>;
- def S64: Rs<64, "s64">, DwarfRegNum<[208]>;
- def S65: Rs<65, "s65">, DwarfRegNum<[209]>;
- def S66: Rs<66, "s66">, DwarfRegNum<[210]>;
- def S67: Rs<67, "s67">, DwarfRegNum<[211]>;
- def S68: Rs<68, "s68">, DwarfRegNum<[212]>;
- def S69: Rs<69, "s69">, DwarfRegNum<[213]>;
- def S70: Rs<70, "s70">, DwarfRegNum<[214]>;
- def S71: Rs<71, "s71">, DwarfRegNum<[215]>;
- def S72: Rs<72, "s72">, DwarfRegNum<[216]>;
- def S73: Rs<73, "s73">, DwarfRegNum<[217]>;
- def S74: Rs<74, "s74">, DwarfRegNum<[218]>;
- def S75: Rs<75, "s75">, DwarfRegNum<[219]>;
- def S76: Rs<76, "s76">, DwarfRegNum<[220]>;
- def S77: Rs<77, "s77">, DwarfRegNum<[221]>;
- def S78: Rs<78, "s78">, DwarfRegNum<[222]>;
- def S79: Rs<79, "s79">, DwarfRegNum<[223]>;
- def S80: Rs<80, "s80">, DwarfRegNum<[224]>;
+ def SGP0 : Rs<0, "sgp0", ["s0"]>, DwarfRegNum<[144]>;
+ def SGP1 : Rs<1, "sgp1", ["s1"]>, DwarfRegNum<[145]>;
+ def STID : Rs<2, "stid", ["s2"]>, DwarfRegNum<[146]>;
+ def ELR : Rs<3, "elr", ["s3"]>, DwarfRegNum<[147]>;
+ def BADVA0 : Rs<4, "badva0", ["s4"]>, DwarfRegNum<[148]>;
+ def BADVA1 : Rs<5, "badva1", ["s5"]>, DwarfRegNum<[149]>;
+ def SSR : Rs<6, "ssr", ["s6"]>, DwarfRegNum<[150]>;
+ def CCR : Rs<7, "ccr", ["s7"]>, DwarfRegNum<[151]>;
+ def HTID : Rs<8, "htid", ["s8"]>, DwarfRegNum<[152]>;
+ def BADVA : Rs<9, "badva", ["s9"]>, DwarfRegNum<[153]>;
+ def IMASK : Rs<10, "imask", ["s10"]>, DwarfRegNum<[154]>;
+ def S11 : Rs<11, "s11">, DwarfRegNum<[155]>;
+ def S12 : Rs<12, "s12">, DwarfRegNum<[156]>;
+ def S13 : Rs<13, "s13">, DwarfRegNum<[157]>;
+ def S14 : Rs<14, "s14">, DwarfRegNum<[158]>;
+ def S15 : Rs<15, "s15">, DwarfRegNum<[159]>;
+ def EVB : Rs<16, "evb", ["s16"]>, DwarfRegNum<[160]>;
+ def MODECTL : Rs<17, "modectl", ["s17"]>, DwarfRegNum<[161]>;
+ def SYSCFG : Rs<18, "syscfg", ["s18"]>, DwarfRegNum<[162]>;
+ def S19 : Rs<19, "s19", ["s19"]>, DwarfRegNum<[163]>;
+ def S20 : Rs<20, "s20", ["s20"]>, DwarfRegNum<[164]>;
+ def VID : Rs<21, "vid", ["s21"]>, DwarfRegNum<[165]>;
+ def S22 : Rs<22, "s22", ["s22"]>, DwarfRegNum<[166]>;
+ def S23 : Rs<23, "s23">, DwarfRegNum<[167]>;
+ def S24 : Rs<24, "s24">, DwarfRegNum<[168]>;
+ def S25 : Rs<25, "s25">, DwarfRegNum<[169]>;
+ def S26 : Rs<26, "s26">, DwarfRegNum<[170]>;
+ def CFGBASE : Rs<27, "cfgbase", ["s27"]>, DwarfRegNum<[171]>;
+ def DIAG : Rs<28, "diag", ["s28"]>, DwarfRegNum<[172]>;
+ def REV : Rs<29, "rev", ["s29"]>, DwarfRegNum<[173]>;
+ def PCYCLELO : Rs<30, "pcyclelo", ["s30"]>, DwarfRegNum<[174]>;
+ def PCYCLEHI : Rs<31, "pcyclehi", ["s31"]>, DwarfRegNum<[175]>;
+ def ISDBST : Rs<32, "isdbst", ["s32"]>, DwarfRegNum<[176]>;
+ def ISDBCFG0 : Rs<33, "isdbcfg0", ["s33"]>, DwarfRegNum<[177]>;
+ def ISDBCFG1 : Rs<34, "isdbcfg1", ["s34"]>, DwarfRegNum<[178]>;
+ def S35 : Rs<35, "s35">, DwarfRegNum<[179]>;
+ def BRKPTPC0 : Rs<36, "brkptpc0", ["s36"]>, DwarfRegNum<[180]>;
+ def BRKPTCFG0 : Rs<37, "brkptcfg0", ["s37"]>, DwarfRegNum<[181]>;
+ def BRKPTPC1 : Rs<38, "brkptpc1", ["s38"]>, DwarfRegNum<[182]>;
+ def BRKPTCFG1 : Rs<39, "brkptcfg1", ["s39"]>, DwarfRegNum<[183]>;
+ def ISDBMBXIN : Rs<40, "isdbmbxin", ["s40"]>, DwarfRegNum<[184]>;
+ def ISDBMBXOUT : Rs<41, "isdbmbxout", ["s41"]>, DwarfRegNum<[185]>;
+ def ISDBEN : Rs<42, "isdben", ["s42"]>, DwarfRegNum<[186]>;
+ def ISDBGPR : Rs<43, "isdbgpr", ["s43"]>, DwarfRegNum<[187]>;
+ def S44 : Rs<44, "s44">, DwarfRegNum<[188]>;
+ def S45 : Rs<45, "s45">, DwarfRegNum<[189]>;
+ def S46 : Rs<46, "s46">, DwarfRegNum<[190]>;
+ def S47 : Rs<47, "s47">, DwarfRegNum<[191]>;
+ def PMUCNT0 : Rs<48, "pmucnt0", ["s48"]>, DwarfRegNum<[192]>;
+ def PMUCNT1 : Rs<49, "pmucnt1", ["s49"]>, DwarfRegNum<[193]>;
+ def PMUCNT2 : Rs<50, "pmucnt2", ["s50"]>, DwarfRegNum<[194]>;
+ def PMUCNT3 : Rs<51, "pmucnt3", ["s51"]>, DwarfRegNum<[195]>;
+ def PMUEVTCFG : Rs<52, "pmuevtcfg", ["s52"]>, DwarfRegNum<[196]>;
+ def PMUCFG : Rs<53, "pmucfg", ["s53"]>, DwarfRegNum<[197]>;
+ def S54 : Rs<54, "s54">, DwarfRegNum<[198]>;
+ def S55 : Rs<55, "s55">, DwarfRegNum<[199]>;
+ def S56 : Rs<56, "s56">, DwarfRegNum<[200]>;
+ def S57 : Rs<57, "s57">, DwarfRegNum<[201]>;
+ def S58 : Rs<58, "s58">, DwarfRegNum<[202]>;
+ def S59 : Rs<59, "s59">, DwarfRegNum<[203]>;
+ def S60 : Rs<60, "s60">, DwarfRegNum<[204]>;
+ def S61 : Rs<61, "s61">, DwarfRegNum<[205]>;
+ def S62 : Rs<62, "s62">, DwarfRegNum<[206]>;
+ def S63 : Rs<63, "s63">, DwarfRegNum<[207]>;
+ def S64 : Rs<64, "s64">, DwarfRegNum<[208]>;
+ def S65 : Rs<65, "s65">, DwarfRegNum<[209]>;
+ def S66 : Rs<66, "s66">, DwarfRegNum<[210]>;
+ def S67 : Rs<67, "s67">, DwarfRegNum<[211]>;
+ def S68 : Rs<68, "s68">, DwarfRegNum<[212]>;
+ def S69 : Rs<69, "s69">, DwarfRegNum<[213]>;
+ def S70 : Rs<70, "s70">, DwarfRegNum<[214]>;
+ def S71 : Rs<71, "s71">, DwarfRegNum<[215]>;
+ def S72 : Rs<72, "s72">, DwarfRegNum<[216]>;
+ def S73 : Rs<73, "s73">, DwarfRegNum<[217]>;
+ def S74 : Rs<74, "s74">, DwarfRegNum<[218]>;
+ def S75 : Rs<75, "s75">, DwarfRegNum<[219]>;
+ def S76 : Rs<76, "s76">, DwarfRegNum<[220]>;
+ def S77 : Rs<77, "s77">, DwarfRegNum<[221]>;
+ def S78 : Rs<78, "s78">, DwarfRegNum<[222]>;
+ def S79 : Rs<79, "s79">, DwarfRegNum<[223]>;
+ def S80 : Rs<80, "s80">, DwarfRegNum<[224]>;
// System Register Pair
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>;
- def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>;
- def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>,
- DwarfRegNum<[148]>;
- def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>;
- def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>;
- def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>;
- def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>;
- def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>;
- def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>;
- def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>;
- def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>;
- def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>;
- def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>;
- def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>;
- def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>;
- def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>, DwarfRegNum<[174]>;
- def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>;
- def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>;
- def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>;
- def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>;
+ def SGP1_0 : Rss<0, "s1:0", [SGP0, SGP1], ["sgp1:0"]>, DwarfRegNum<[144]>;
+ def S3_2 : Rss<2, "s3:2", [STID, ELR]>, DwarfRegNum<[146]>;
+ def S5_4 : Rss<4, "s5:4", [BADVA0, BADVA1], ["badva1:0"]>,
+ DwarfRegNum<[148]>;
+ def S7_6 : Rss<6, "s7:6", [SSR, CCR], ["ccr:ssr"]>, DwarfRegNum<[150]>;
+ def S9_8 : Rss<8, "s9:8", [HTID, BADVA]>, DwarfRegNum<[152]>;
+ def S11_10 : Rss<10, "s11:10", [IMASK, S11]>, DwarfRegNum<[154]>;
+ def S13_12 : Rss<12, "s13:12", [S12, S13]>, DwarfRegNum<[156]>;
+ def S15_14 : Rss<14, "s15:14", [S14, S15]>, DwarfRegNum<[158]>;
+ def S17_16 : Rss<16, "s17:16", [EVB, MODECTL]>, DwarfRegNum<[160]>;
+ def S19_18 : Rss<18, "s19:18", [SYSCFG, S19]>, DwarfRegNum<[162]>;
+ def S21_20 : Rss<20, "s21:20", [S20, VID]>, DwarfRegNum<[164]>;
+ def S23_22 : Rss<22, "s23:22", [S22, S23]>, DwarfRegNum<[166]>;
+ def S25_24 : Rss<24, "s25:24", [S24, S25]>, DwarfRegNum<[168]>;
+ def S27_26 : Rss<26, "s27:26", [S26, CFGBASE]>, DwarfRegNum<[170]>;
+ def S29_28 : Rss<28, "s29:28", [DIAG, REV]>, DwarfRegNum<[172]>;
+ def S31_30 : Rss<30, "s31:30", [PCYCLELO, PCYCLEHI], ["pcycle"]>,
+ DwarfRegNum<[174]>;
+ def S33_32 : Rss<32, "s33:32", [ISDBST, ISDBCFG0]>, DwarfRegNum<[176]>;
+ def S35_34 : Rss<34, "s35:34", [ISDBCFG1, S35]>, DwarfRegNum<[178]>;
+ def S37_36 : Rss<36, "s37:36", [BRKPTPC0, BRKPTCFG0]>, DwarfRegNum<[180]>;
+ def S39_38 : Rss<38, "s39:38", [BRKPTPC1, BRKPTCFG1]>, DwarfRegNum<[182]>;
def S41_40 : Rss<40, "s41:40", [ISDBMBXIN, ISDBMBXOUT]>, DwarfRegNum<[184]>;
- def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>;
- def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>;
- def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>;
- def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>;
- def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>;
- def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>;
- def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>;
- def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>;
- def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>;
- def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>;
- def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>;
- def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>;
- def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>;
- def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>;
- def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>;
- def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>;
- def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>;
- def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>;
- def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>;
+ def S43_42 : Rss<42, "s43:42", [ISDBEN, ISDBGPR]>, DwarfRegNum<[186]>;
+ def S45_44 : Rss<44, "s45:44", [S44, S45]>, DwarfRegNum<[188]>;
+ def S47_46 : Rss<46, "s47:46", [S46, S47]>, DwarfRegNum<[190]>;
+ def S49_48 : Rss<48, "s49:48", [PMUCNT0, PMUCNT1]>, DwarfRegNum<[192]>;
+ def S51_50 : Rss<50, "s51:50", [PMUCNT2, PMUCNT3]>, DwarfRegNum<[194]>;
+ def S53_52 : Rss<52, "s53:52", [PMUEVTCFG, PMUCFG]>, DwarfRegNum<[196]>;
+ def S55_54 : Rss<54, "s55:54", [S54, S55]>, DwarfRegNum<[198]>;
+ def S57_56 : Rss<56, "s57:56", [S56, S57]>, DwarfRegNum<[200]>;
+ def S59_58 : Rss<58, "s59:58", [S58, S59]>, DwarfRegNum<[202]>;
+ def S61_60 : Rss<60, "s61:60", [S60, S61]>, DwarfRegNum<[204]>;
+ def S63_62 : Rss<62, "s63:62", [S62, S63]>, DwarfRegNum<[206]>;
+ def S65_64 : Rss<64, "s65:64", [S64, S65]>, DwarfRegNum<[208]>;
+ def S67_66 : Rss<66, "s67:66", [S66, S67]>, DwarfRegNum<[210]>;
+ def S69_68 : Rss<68, "s69:68", [S68, S69]>, DwarfRegNum<[212]>;
+ def S71_70 : Rss<70, "s71:70", [S70, S71]>, DwarfRegNum<[214]>;
+ def S73_72 : Rss<72, "s73:72", [S72, S73]>, DwarfRegNum<[216]>;
+ def S75_74 : Rss<74, "s75:74", [S74, S75]>, DwarfRegNum<[218]>;
+ def S77_76 : Rss<76, "s77:76", [S77, S76]>, DwarfRegNum<[219]>;
+ def S79_78 : Rss<78, "s79:78", [S79, S78]>, DwarfRegNum<[220]>;
}
// Guest Registers
- def GELR: Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>;
- def GSR: Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>;
- def GOSP: Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>;
- def G3: Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>;
- def G4: Rg<4, "g4">, DwarfRegNum<[224]>;
- def G5: Rg<5, "g5">, DwarfRegNum<[225]>;
- def G6: Rg<6, "g6">, DwarfRegNum<[226]>;
- def G7: Rg<7, "g7">, DwarfRegNum<[227]>;
- def G8: Rg<8, "g8">, DwarfRegNum<[228]>;
- def G9: Rg<9, "g9">, DwarfRegNum<[229]>;
- def G10: Rg<10, "g10">, DwarfRegNum<[230]>;
- def G11: Rg<11, "g11">, DwarfRegNum<[231]>;
- def G12: Rg<12, "g12">, DwarfRegNum<[232]>;
- def G13: Rg<13, "g13">, DwarfRegNum<[233]>;
- def G14: Rg<14, "g14">, DwarfRegNum<[234]>;
- def G15: Rg<15, "g15">, DwarfRegNum<[235]>;
- def GPMUCNT4: Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>;
- def GPMUCNT5: Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>;
- def GPMUCNT6: Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>;
- def GPMUCNT7: Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>;
- def G20: Rg<20, "g20">, DwarfRegNum<[240]>;
- def G21: Rg<21, "g21">, DwarfRegNum<[241]>;
- def G22: Rg<22, "g22">, DwarfRegNum<[242]>;
- def G23: Rg<23, "g23">, DwarfRegNum<[243]>;
- def GPCYCLELO: Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>;
- def GPCYCLEHI: Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>;
- def GPMUCNT0: Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>;
- def GPMUCNT1: Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>;
- def GPMUCNT2: Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>;
- def GPMUCNT3: Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>;
- def G30: Rg<30, "g30">, DwarfRegNum<[250]>;
- def G31: Rg<31, "g31">, DwarfRegNum<[251]>;
+ def GELR : Rg<0, "gelr", ["g0"]>, DwarfRegNum<[220]>;
+ def GSR : Rg<1, "gsr", ["g1"]>, DwarfRegNum<[221]>;
+ def GOSP : Rg<2, "gosp", ["g2"]>, DwarfRegNum<[222]>;
+ def G3 : Rg<3, "gbadva", ["g3"]>, DwarfRegNum<[223]>;
+ def G4 : Rg<4, "g4">, DwarfRegNum<[224]>;
+ def G5 : Rg<5, "g5">, DwarfRegNum<[225]>;
+ def G6 : Rg<6, "g6">, DwarfRegNum<[226]>;
+ def G7 : Rg<7, "g7">, DwarfRegNum<[227]>;
+ def G8 : Rg<8, "g8">, DwarfRegNum<[228]>;
+ def G9 : Rg<9, "g9">, DwarfRegNum<[229]>;
+ def G10 : Rg<10, "g10">, DwarfRegNum<[230]>;
+ def G11 : Rg<11, "g11">, DwarfRegNum<[231]>;
+ def G12 : Rg<12, "g12">, DwarfRegNum<[232]>;
+ def G13 : Rg<13, "g13">, DwarfRegNum<[233]>;
+ def G14 : Rg<14, "g14">, DwarfRegNum<[234]>;
+ def G15 : Rg<15, "g15">, DwarfRegNum<[235]>;
+ def GPMUCNT4 : Rg<16, "gpmucnt4", ["g16"]>, DwarfRegNum<[236]>;
+ def GPMUCNT5 : Rg<17, "gpmucnt5", ["g17"]>, DwarfRegNum<[237]>;
+ def GPMUCNT6 : Rg<18, "gpmucnt6", ["g18"]>, DwarfRegNum<[238]>;
+ def GPMUCNT7 : Rg<19, "gpmucnt7", ["g19"]>, DwarfRegNum<[239]>;
+ def G20 : Rg<20, "g20">, DwarfRegNum<[240]>;
+ def G21 : Rg<21, "g21">, DwarfRegNum<[241]>;
+ def G22 : Rg<22, "g22">, DwarfRegNum<[242]>;
+ def G23 : Rg<23, "g23">, DwarfRegNum<[243]>;
+ def GPCYCLELO : Rg<24, "gpcyclelo", ["g24"]>, DwarfRegNum<[244]>;
+ def GPCYCLEHI : Rg<25, "gpcyclehi", ["g25"]>, DwarfRegNum<[245]>;
+ def GPMUCNT0 : Rg<26, "gpmucnt0", ["g26"]>, DwarfRegNum<[246]>;
+ def GPMUCNT1 : Rg<27, "gpmucnt1", ["g27"]>, DwarfRegNum<[247]>;
+ def GPMUCNT2 : Rg<28, "gpmucnt2", ["g28"]>, DwarfRegNum<[248]>;
+ def GPMUCNT3 : Rg<29, "gpmucnt3", ["g29"]>, DwarfRegNum<[249]>;
+ def G30 : Rg<30, "g30">, DwarfRegNum<[250]>;
+ def G31 : Rg<31, "g31">, DwarfRegNum<[251]>;
// Guest Register Pairs
let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
- def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>;
- def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>;
- def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>;
- def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>;
- def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>;
- def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>;
- def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>;
- def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>;
- def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>;
- def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>;
- def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>;
- def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>;
+ def G1_0 : Rgg<0, "g1:0", [GELR, GSR]>, DwarfRegNum<[220]>;
+ def G3_2 : Rgg<2, "g3:2", [GOSP, G3]>, DwarfRegNum<[222]>;
+ def G5_4 : Rgg<4, "g5:4", [G4, G5]>, DwarfRegNum<[224]>;
+ def G7_6 : Rgg<6, "g7:6", [G6, G7]>, DwarfRegNum<[226]>;
+ def G9_8 : Rgg<8, "g9:8", [G8, G9]>, DwarfRegNum<[228]>;
+ def G11_10 : Rgg<10, "g11:10", [G10, G11]>, DwarfRegNum<[230]>;
+ def G13_12 : Rgg<12, "g13:12", [G12, G13]>, DwarfRegNum<[232]>;
+ def G15_14 : Rgg<14, "g15:14", [G14, G15]>, DwarfRegNum<[234]>;
+ def G17_16 : Rgg<16, "g17:16", [GPMUCNT4, GPMUCNT5]>, DwarfRegNum<[236]>;
+ def G19_18 : Rgg<18, "g19:18", [GPMUCNT6, GPMUCNT7]>, DwarfRegNum<[238]>;
+ def G21_20 : Rgg<20, "g21:20", [G20, G21]>, DwarfRegNum<[240]>;
+ def G23_22 : Rgg<22, "g23:22", [G22, G23]>, DwarfRegNum<[242]>;
def G25_24 : Rgg<24, "g25:24", [GPCYCLELO, GPCYCLEHI]>, DwarfRegNum<[244]>;
- def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>;
- def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>;
- def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>;
+ def G27_26 : Rgg<26, "g27:26", [GPMUCNT0, GPMUCNT1]>, DwarfRegNum<[246]>;
+ def G29_28 : Rgg<28, "g29:28", [GPMUCNT2, GPMUCNT3]>, DwarfRegNum<[248]>;
+ def G31_30 : Rgg<30, "g31:30", [G30, G31]>, DwarfRegNum<[250]>;
}
-
}
// HVX types
-def VecI1: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v64i1, v128i1, v64i1]>;
-def VecI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v64i8, v128i8, v64i8]>;
-def VecI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v32i16, v64i16, v32i16]>;
-def VecI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v16i32, v32i32, v16i32]>;
-def VecF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v32f16, v64f16, v32f16]>;
-def VecF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v16f32, v32f32, v16f32]>;
-
-def VecPI8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v128i8, v256i8, v128i8]>;
-def VecPI16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v64i16, v128i16, v64i16]>;
-def VecPI32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v32i32, v64i32, v32i32]>;
-def VecPF16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v64f16, v128f16, v64f16]>;
-def VecPF32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v32f32, v64f32, v32f32]>;
-
-def VecQ8: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v64i1, v128i1, v64i1]>;
-def VecQ16: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v32i1, v64i1, v32i1]>;
-def VecQ32: ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode],
- [v16i1, v32i1, v16i1]>;
+def VecI1
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i1, v128i1, v64i1]>;
+def VecI8
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i8, v128i8, v64i8]>;
+def VecI16
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i16, v64i16, v32i16]>;
+def VecI32
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16i32, v32i32, v16i32]>;
+def VecF16
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32f16, v64f16, v32f16]>;
+def VecF32
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16f32, v32f32, v16f32]>;
+def VecBF16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32bf16, v64bf16,
+ v32bf16]>;
+
+def VecPI8
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v128i8, v256i8, v128i8]>;
+def VecPI16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i16, v128i16,
+ v64i16]>;
+def VecPI32
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i32, v64i32, v32i32]>;
+def VecPF16 : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64f16, v128f16,
+ v64f16]>;
+def VecPF32
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32f32, v64f32, v32f32]>;
+def VecPBF16
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64bf16, v128bf16,
+ v64bf16]>;
+
+def VecQ8
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v64i1, v128i1, v64i1]>;
+def VecQ16
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v32i1, v64i1, v32i1]>;
+def VecQ32
+ : ValueTypeByHwMode<[Hvx64, Hvx128, DefaultMode], [v16i1, v32i1, v16i1]>;
// HVX register classes
-def HvxVR : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecF32], 512,
- (add (sequence "V%u", 0, 31), VTMP)> {
- let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<512,512,512>, RegInfo<1024,1024,1024>, RegInfo<512,512,512>]>;
+def HvxVR
+ : RegisterClass<"Hexagon", [VecI8, VecI16, VecI32, VecF16, VecBF16, VecF32],
+ 512, (add (sequence "V%u", 0, 31), VTMP)> {
+ let RegInfos =
+ RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<512, 512, 512>,
+ RegInfo<1024, 1024, 1024>,
+ RegInfo<512, 512, 512>]>;
}
-def HvxWR : RegisterClass<"Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPF32], 1024,
- (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
- let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<1024,1024,512>, RegInfo<2048,2048,1024>, RegInfo<1024,1024,512>]>;
+def HvxWR
+ : RegisterClass<
+ "Hexagon", [VecPI8, VecPI16, VecPI32, VecPF16, VecPBF16, VecPF32],
+ 1024, (add (sequence "W%u", 0, 15), (sequence "WR%u", 0, 15))> {
+ let RegInfos =
+ RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<1024, 1024, 512>,
+ RegInfo<2048, 2048, 1024>,
+ RegInfo<1024, 1024, 512>]>;
}
def HvxQR : RegisterClass<"Hexagon", [VecI1, VecQ8, VecQ16, VecQ32], 128,
- (add Q0, Q1, Q2, Q3)> {
- let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<64,512,512>, RegInfo<128,1024,1024>, RegInfo<64,512,512>]>;
+ (add Q0, Q1, Q2, Q3)> {
+ let RegInfos =
+ RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<64, 512, 512>,
+ RegInfo<128, 1024, 1024>,
+ RegInfo<64, 512, 512>]>;
}
-def HvxVQR : RegisterClass<"Hexagon", [untyped], 2048,
- (add (sequence "VQ%u", 0, 7))> {
- let RegInfos = RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode],
- [RegInfo<2048,2048,512>, RegInfo<4096,4096,1024>, RegInfo<2048,2048,512>]>;
+def HvxVQR
+ : RegisterClass<"Hexagon", [untyped], 2048, (add (sequence "VQ%u", 0, 7))> {
+ let RegInfos =
+ RegInfoByHwMode<[Hvx64, Hvx128, DefaultMode], [RegInfo<2048, 2048, 512>,
+ RegInfo<4096, 4096, 1024>,
+ RegInfo<2048, 2048, 512>]>;
}
// Core register classes
-def IntRegs : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
- (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28),
- R10, R11, R29, R30, R31)>;
+def IntRegs
+ : RegisterClass<"Hexagon", [i32, f32, v4i8, v2i16], 32,
+ (add (sequence "R%u", 0, 9), (sequence "R%u", 12, 28), R10,
+ R11, R29, R30, R31)>;
// Registers are listed in reverse order for allocation preference reasons.
def GeneralSubRegs : RegisterClass<"Hexagon", [i32], 32,
- (add R23, R22, R21, R20, R19, R18, R17, R16,
- R7, R6, R5, R4, R3, R2, R1, R0)>;
+ (add R23, R22, R21, R20, R19, R18, R17, R16,
+ R7, R6, R5, R4, R3, R2, R1, R0)>;
-def IntRegsLow8 : RegisterClass<"Hexagon", [i32], 32,
- (add R7, R6, R5, R4, R3, R2, R1, R0)> ;
+def IntRegsLow8
+ : RegisterClass<"Hexagon", [i32], 32, (add R7, R6, R5, R4, R3, R2, R1, R0)>;
def DoubleRegs : RegisterClass<"Hexagon", [i64, f64, v8i8, v4i16, v2i32], 64,
- (add (sequence "D%u", 0, 4), (sequence "D%u", 6, 13), D5, D14, D15)>;
-
-def GeneralDoubleLow8Regs : RegisterClass<"Hexagon", [i64], 64,
- (add D11, D10, D9, D8, D3, D2, D1, D0)>;
-
-let Size = 32 in
-def PredRegs : RegisterClass<"Hexagon",
- [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32, (add P0, P1, P2, P3)>;
-
-let Size = 32 in
-def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
-
-let Size = 32, isAllocatable = 0 in
-def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
- (add LC0, SA0, LC1, SA1, P3_0, C5, C8, PC, UGP, GP, CS0, CS1,
- UPCYCLELO, UPCYCLEHI,
- FRAMELIMIT, FRAMEKEY, PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI,
- M0, M1, USR)>;
-
-let Size = 64 in
-def VectRegRev : RegisterClass<"Hexagon", [i64], 64,
- (add (sequence "WR%u", 0, 15))>;
-
-let isAllocatable = 0 in
-def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
-
-let Size = 64, isAllocatable = 0 in
-def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
- (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE, C17_16,
- PKTCOUNT, UTIMER)>;
-
-let Size = 32, isAllocatable = 0 in
-def GuestRegs : RegisterClass<"Hexagon", [i32], 32,
- (add GELR, GSR, GOSP,
- (sequence "G%u", 3, 15),
- GPMUCNT4, GPMUCNT5, GPMUCNT6, GPMUCNT7,
- G20, G21, G22, G23,
- GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1,
- GPMUCNT2, GPMUCNT3,
- G30, G31)>;
-
-let Size = 64, isAllocatable = 0 in
-def GuestRegs64 : RegisterClass<"Hexagon", [i64], 64,
- (add G1_0, G3_2,
- G5_4, G7_6, G9_8, G11_10, G13_12, G15_14,
- G17_16, G19_18,
- G21_20, G23_22,
- G25_24, G27_26, G29_28,
- G31_30)>;
-
-let Size = 32, isAllocatable = 0 in
-def SysRegs : RegisterClass<"Hexagon", [i32], 32,
- (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1,
- SSR, CCR, HTID, BADVA, IMASK,
- S11, S12, S13, S14, S15,
- S19, S23, S25,
- EVB, MODECTL, SYSCFG, S20, VID, S22, S24,
- S26, CFGBASE, DIAG, REV, PCYCLEHI,
- PCYCLELO, ISDBST, ISDBCFG0, ISDBCFG1, S35,
- BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1,
- ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR,
- S44, S45, S46, S47,
- PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3,
- PMUEVTCFG, PMUCFG, S54, S55, S56, S57,
- S58, S59, S60, S61, S62, S63, S64, S65, S66, S67,
- S68, S69, S70, S71, S72, S73, S74, S75, S76, S77,
- S78, S79, S80
- )>;
-
-let Size = 64, isAllocatable = 0 in
-def SysRegs64 : RegisterClass<"Hexagon", [i64], 64,
- (add SGP1_0,
- S3_2, S5_4, S7_6, S9_8,
- S11_10, S13_12, S15_14,
- S17_16, S19_18, S21_20,
- S23_22, S25_24,
- S27_26, S29_28, S31_30, S33_32, S35_34,
- S37_36, S39_38, S41_40, S43_42, S45_44,
- S47_46, S49_48, S51_50, S53_52,
- S55_54, S57_56, S59_58,
- S61_60, S63_62, S65_64, S67_66, S69_68,
- S71_70, S73_72, S75_74, S77_76, S79_78
- )>;
+ (add (sequence "D%u", 0, 4),
+ (sequence "D%u", 6, 13), D5, D14, D15)>;
+
+def GeneralDoubleLow8Regs
+ : RegisterClass<"Hexagon", [i64], 64,
+ (add D11, D10, D9, D8, D3, D2, D1, D0)>;
+
+let Size = 32 in def PredRegs
+ : RegisterClass<"Hexagon", [i1, v2i1, v4i1, v8i1, v4i8, v2i16, i32], 32,
+ (add P0, P1, P2, P3)>;
+
+let Size =
+ 32 in def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
+
+let Size = 32, isAllocatable = 0 in def CtrRegs
+ : RegisterClass<"Hexagon", [i32], 32,
+ (add LC0, SA0, LC1, SA1, P3_0, C5, C8, PC, UGP, GP, CS0,
+ CS1, UPCYCLELO, UPCYCLEHI, FRAMELIMIT, FRAMEKEY,
+ PKTCOUNTLO, PKTCOUNTHI, UTIMERLO, UTIMERHI, M0, M1,
+ USR)>;
+
+let Size = 64 in def VectRegRev
+ : RegisterClass<"Hexagon", [i64], 64, (add (sequence "WR%u", 0, 15))>;
+
+let isAllocatable =
+ 0 in def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
+
+let Size = 64, isAllocatable = 0 in def CtrRegs64
+ : RegisterClass<"Hexagon", [i64], 64,
+ (add C1_0, C3_2, C5_4, C7_6, C9_8, C11_10, CS, UPCYCLE,
+ C17_16, PKTCOUNT, UTIMER)>;
+
+let Size = 32, isAllocatable = 0 in def GuestRegs
+ : RegisterClass<"Hexagon", [i32], 32,
+ (add GELR, GSR, GOSP, (sequence "G%u", 3, 15), GPMUCNT4,
+ GPMUCNT5, GPMUCNT6, GPMUCNT7, G20, G21, G22, G23,
+ GPCYCLELO, GPCYCLEHI, GPMUCNT0, GPMUCNT1, GPMUCNT2,
+ GPMUCNT3, G30, G31)>;
+
+let Size = 64, isAllocatable = 0 in def GuestRegs64
+ : RegisterClass<"Hexagon", [i64], 64,
+ (add G1_0, G3_2, G5_4, G7_6, G9_8, G11_10, G13_12, G15_14,
+ G17_16, G19_18, G21_20, G23_22, G25_24, G27_26, G29_28,
+ G31_30)>;
+
+let Size = 32, isAllocatable = 0 in def SysRegs
+ : RegisterClass<"Hexagon", [i32], 32,
+ (add SGP0, SGP1, STID, ELR, BADVA0, BADVA1, SSR, CCR, HTID,
+ BADVA, IMASK, S11, S12, S13, S14, S15, S19, S23, S25,
+ EVB, MODECTL, SYSCFG, S20, VID, S22, S24, S26, CFGBASE,
+ DIAG, REV, PCYCLEHI, PCYCLELO, ISDBST, ISDBCFG0,
+ ISDBCFG1, S35, BRKPTPC0, BRKPTCFG0, BRKPTPC1, BRKPTCFG1,
+ ISDBMBXIN, ISDBMBXOUT, ISDBEN, ISDBGPR, S44, S45, S46,
+ S47, PMUCNT0, PMUCNT1, PMUCNT2, PMUCNT3, PMUEVTCFG,
+ PMUCFG, S54, S55, S56, S57, S58, S59, S60, S61, S62,
+ S63, S64, S65, S66, S67, S68, S69, S70, S71, S72, S73,
+ S74, S75, S76, S77, S78, S79, S80)>;
+
+let Size = 64, isAllocatable = 0 in def SysRegs64
+ : RegisterClass<"Hexagon", [i64], 64,
+ (add SGP1_0, S3_2, S5_4, S7_6, S9_8, S11_10, S13_12, S15_14,
+ S17_16, S19_18, S21_20, S23_22, S25_24, S27_26, S29_28,
+ S31_30, S33_32, S35_34, S37_36, S39_38, S41_40, S43_42,
+ S45_44, S47_46, S49_48, S51_50, S53_52, S55_54, S57_56,
+ S59_58, S61_60, S63_62, S65_64, S67_66, S69_68, S71_70,
+ S73_72, S75_74, S77_76, S79_78)>;
// These registers are new for v62 and onward.
// The function RegisterMatchesArch() uses this list for validation.
-let isAllocatable = 0 in
-def V62Regs : RegisterClass<"Hexagon", [i32], 32,
- (add FRAMELIMIT, FRAMEKEY, C17_16, PKTCOUNTLO, PKTCOUNTHI, PKTCOUNT,
- UTIMERLO, UTIMERHI, UTIMER)>;
+let isAllocatable = 0 in def V62Regs
+ : RegisterClass<"Hexagon", [i32], 32,
+ (add FRAMELIMIT, FRAMEKEY, C17_16, PKTCOUNTLO, PKTCOUNTHI,
+ PKTCOUNT, UTIMERLO, UTIMERHI, UTIMER)>;
// These registers are new for v65 and onward.
-let Size = 32, isAllocatable = 0 in
-def V65Regs : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>;
-
+let Size = 32, isAllocatable = 0 in def V65Regs
+ : RegisterClass<"Hexagon", [i32], 32, (add VTMP)>;
-def HexagonCSR
- : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23,
- R24, R25, R26, R27)>;
+def HexagonCSR : CalleeSavedRegs<(add R16, R17, R18, R19, R20, R21, R22, R23,
+ R24, R25, R26, R27)>;
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index ce2de75..66c8b0a 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -28,7 +28,6 @@
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
-#include <map>
#include <optional>
using namespace llvm;
@@ -77,8 +76,7 @@ HexagonSubtarget::HexagonSubtarget(const Triple &TT, StringRef CPU,
OptLevel(TM.getOptLevel()),
CPUString(std::string(Hexagon_MC::selectHexagonCPU(CPU))),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
- RegInfo(getHwMode()), TLInfo(TM, *this),
- InstrItins(getInstrItineraryForCPU(CPUString)) {
+ TLInfo(TM, *this), InstrItins(getInstrItineraryForCPU(CPUString)) {
Hexagon_MC::addArchSubtarget(this, FS);
// Beware of the default constructor of InstrItineraryData: it will
// reset all members to 0.
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 995f66d..dde3229 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -100,7 +100,6 @@ private:
// The following objects can use the TargetTriple, so they must be
// declared after it.
HexagonInstrInfo InstrInfo;
- HexagonRegisterInfo RegInfo;
HexagonTargetLowering TLInfo;
HexagonSelectionDAGInfo TSInfo;
HexagonFrameLowering FrameLowering;
@@ -122,7 +121,7 @@ public:
}
const HexagonInstrInfo *getInstrInfo() const override { return &InstrInfo; }
const HexagonRegisterInfo *getRegisterInfo() const override {
- return &RegInfo;
+ return &InstrInfo.getRegisterInfo();
}
const HexagonTargetLowering *getTargetLowering() const override {
return &TLInfo;
@@ -345,7 +344,11 @@ public:
ArrayRef<MVT> getHVXElementTypes() const {
static MVT Types[] = {MVT::i8, MVT::i16, MVT::i32};
static MVT TypesV68[] = {MVT::i8, MVT::i16, MVT::i32, MVT::f16, MVT::f32};
+ static MVT TypesV81[] = {MVT::i8, MVT::i16, MVT::i32,
+ MVT::f16, MVT::bf16, MVT::f32};
+ if (useHVXV81Ops() && useHVXFloatingPoint())
+ return ArrayRef(TypesV81);
if (useHVXV68Ops() && useHVXFloatingPoint())
return ArrayRef(TypesV68);
return ArrayRef(Types);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index d9824a31..d98fe80 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -48,6 +48,14 @@ static cl::opt<bool>
cl::desc("Disable Hardware Loops for Hexagon target"));
static cl::opt<bool>
+ EnableGenWideningVec("hexagon-widening-vectors", cl::init(true), cl::Hidden,
+ cl::desc("Generate widening vector instructions"));
+
+static cl::opt<bool>
+ EnableOptShuffleVec("hexagon-opt-shuffvec", cl::init(true), cl::Hidden,
+ cl::desc("Enable optimization of shuffle vectors"));
+
+static cl::opt<bool>
DisableAModeOpt("disable-hexagon-amodeopt", cl::Hidden,
cl::desc("Disable Hexagon Addressing Mode Optimization"));
@@ -321,6 +329,8 @@ TargetPassConfig *HexagonTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void HexagonPassConfig::addIRPasses() {
+ HexagonTargetMachine &HTM = getHexagonTargetMachine();
+
TargetPassConfig::addIRPasses();
bool NoOpt = (getOptLevel() == CodeGenOptLevel::None);
@@ -350,6 +360,13 @@ void HexagonPassConfig::addIRPasses() {
// Replace certain combinations of shifts and ands with extracts.
if (EnableGenExtract)
addPass(createHexagonGenExtract());
+ if (EnableGenWideningVec) {
+ addPass(createHexagonGenWideningVecInstr(HTM));
+ addPass(createHexagonGenWideningVecFloatInstr(HTM));
+ addPass(createDeadCodeEliminationPass());
+ }
+ if (EnableOptShuffleVec)
+ addPass(createHexagonOptShuffleVector(HTM));
}
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index e925e04..59c6201 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -224,14 +224,6 @@ InstructionCost HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
}
InstructionCost
-HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- Align Alignment, unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const {
- return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
- CostKind);
-}
-
-InstructionCost
HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
VectorType *SrcTy, ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
@@ -240,13 +232,6 @@ HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy,
return 1;
}
-InstructionCost HexagonTTIImpl::getGatherScatterOpCost(
- unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
- Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
- return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment, CostKind, I);
-}
-
InstructionCost HexagonTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -345,14 +330,16 @@ InstructionCost HexagonTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
}
bool HexagonTTIImpl::isLegalMaskedStore(Type *DataType, Align /*Alignment*/,
- unsigned /*AddressSpace*/) const {
+ unsigned /*AddressSpace*/,
+ TTI::MaskKind /*MaskKind*/) const {
// This function is called from scalarize-masked-mem-intrin, which runs
// in pre-isel. Use ST directly instead of calling isHVXVectorType.
return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
}
bool HexagonTTIImpl::isLegalMaskedLoad(Type *DataType, Align /*Alignment*/,
- unsigned /*AddressSpace*/) const {
+ unsigned /*AddressSpace*/,
+ TTI::MaskKind /*MaskKind*/) const {
// This function is called from scalarize-masked-mem-intrin, which runs
// in pre-isel. Use ST directly instead of calling isHVXVectorType.
return HexagonMaskedVMem && ST.isTypeForHVX(DataType);
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index cec2bf9..edf88cf 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -120,19 +120,10 @@ public:
TTI::OperandValueInfo OpInfo = {TTI::OK_AnyValue, TTI::OP_None},
const Instruction *I = nullptr) const override;
InstructionCost
- getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
- unsigned AddressSpace,
- TTI::TargetCostKind CostKind) const override;
- InstructionCost
getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy,
ArrayRef<int> Mask, TTI::TargetCostKind CostKind, int Index,
VectorType *SubTp, ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const override;
- InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- const Value *Ptr, bool VariableMask,
- Align Alignment,
- TTI::TargetCostKind CostKind,
- const Instruction *I) const override;
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -166,9 +157,10 @@ public:
}
bool isLegalMaskedStore(Type *DataType, Align Alignment,
- unsigned AddressSpace) const override;
- bool isLegalMaskedLoad(Type *DataType, Align Alignment,
- unsigned AddressSpace) const override;
+ unsigned AddressSpace,
+ TTI::MaskKind MaskKind) const override;
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace,
+ TTI::MaskKind MaskKind) const override;
bool isLegalMaskedGather(Type *Ty, Align Alignment) const override;
bool isLegalMaskedScatter(Type *Ty, Align Alignment) const override;
bool forceScalarizeMaskedGather(VectorType *VTy,
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index cb88d1a..d39b79a 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -653,7 +653,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
const MCInstrDesc& MCID = PacketMI.getDesc();
// First operand is always the result.
- const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI);
+ const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0);
// Double regs can not feed into new value store: PRM section: 5.4.2.2.
if (PacketRC == &Hexagon::DoubleRegsRegClass)
return false;
@@ -866,7 +866,7 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
return false;
const MCInstrDesc& MCID = PI.getDesc();
- const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI);
+ const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0);
if (DisableVecDblNVStores && VecRC == &Hexagon::HvxWRRegClass)
return false;
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 5c50ec2..2813b1d 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -120,10 +120,6 @@ public:
size_t length(Value *Val) const;
size_t length(Type *Ty) const;
- Constant *getNullValue(Type *Ty) const;
- Constant *getFullValue(Type *Ty) const;
- Constant *getConstSplat(Type *Ty, int Val) const;
-
Value *simplify(Value *Val) const;
Value *insertb(IRBuilderBase &Builder, Value *Dest, Value *Src, int Start,
@@ -368,8 +364,8 @@ private:
const HexagonVectorCombine &HVC;
};
-[[maybe_unused]]
-raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const AlignVectors::AddrInfo &AI) {
OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
OS << "Addr: " << *AI.Addr << '\n';
OS << "Type: " << *AI.ValTy << '\n';
@@ -379,8 +375,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
return OS;
}
-[[maybe_unused]]
-raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const AlignVectors::MoveGroup &MG) {
OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
OS << "Main\n";
@@ -398,9 +394,8 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
return OS;
}
-[[maybe_unused]]
-raw_ostream &operator<<(raw_ostream &OS,
- const AlignVectors::ByteSpan::Block &B) {
+[[maybe_unused]] raw_ostream &
+operator<<(raw_ostream &OS, const AlignVectors::ByteSpan::Block &B) {
OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
OS << "(self:" << B.Seg.Val << ')';
@@ -412,8 +407,8 @@ raw_ostream &operator<<(raw_ostream &OS,
return OS;
}
-[[maybe_unused]]
-raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
+[[maybe_unused]] raw_ostream &operator<<(raw_ostream &OS,
+ const AlignVectors::ByteSpan &BS) {
OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
for (const AlignVectors::ByteSpan::Block &B : BS)
OS << B << '\n';
@@ -683,8 +678,8 @@ auto AlignVectors::getMask(Value *Val) const -> Value * {
Type *ValTy = getPayload(Val)->getType();
if (auto *VecTy = dyn_cast<VectorType>(ValTy))
- return HVC.getFullValue(HVC.getBoolTy(HVC.length(VecTy)));
- return HVC.getFullValue(HVC.getBoolTy());
+ return Constant::getAllOnesValue(HVC.getBoolTy(HVC.length(VecTy)));
+ return Constant::getAllOnesValue(HVC.getBoolTy());
}
auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
@@ -1123,7 +1118,7 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
BasicBlock *BaseBlock = Builder.GetInsertBlock();
ByteSpan ASpan;
- auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
+ auto *True = Constant::getAllOnesValue(HVC.getBoolTy(ScLen));
auto *Undef = UndefValue::get(SecTy);
// Created load does not have to be "Instruction" (e.g. "undef").
@@ -1350,7 +1345,7 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
ByteSpan VSection =
VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
Value *Undef = UndefValue::get(SecTy);
- Value *Zero = HVC.getNullValue(SecTy);
+ Value *Zero = Constant::getNullValue(SecTy);
Value *AccumV = Undef;
Value *AccumM = Zero;
for (ByteSpan::Block &S : VSection) {
@@ -2475,19 +2470,19 @@ Value *HvxIdioms::processVGather(Instruction &In) const {
Dst->eraseFromParent();
} else if (Qual == HvxIdioms::LLVM_Scatter) {
// Gather feeds directly into scatter.
- LLVM_DEBUG({
- auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType());
- assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
- unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
- unsigned DstElements = HVC.length(DstInpTy);
- auto *DstElemTy = cast<PointerType>(DstInpTy->getElementType());
- assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
- dbgs() << " Gather feeds into scatter\n Values to scatter : "
- << *Dst->getOperand(0) << "\n";
- dbgs() << " Dst type(" << *DstInpTy << ") elements(" << DstElements
- << ") VecLen(" << DstInpSize << ") type(" << *DstElemTy
- << ") Access alignment(" << *Dst->getOperand(2) << ")\n";
- });
+ auto *DstInpTy = cast<VectorType>(Dst->getOperand(1)->getType());
+ assert(DstInpTy && "Cannot handle no vector type for llvm.scatter");
+ [[maybe_unused]] unsigned DstInpSize = HVC.getSizeOf(DstInpTy);
+ [[maybe_unused]] unsigned DstElements = HVC.length(DstInpTy);
+ [[maybe_unused]] auto *DstElemTy =
+ cast<PointerType>(DstInpTy->getElementType());
+ assert(DstElemTy && "llvm.scatter needs vector of ptr argument");
+ LLVM_DEBUG(dbgs() << " Gather feeds into scatter\n Values to scatter : "
+ << *Dst->getOperand(0) << "\n");
+ LLVM_DEBUG(dbgs() << " Dst type(" << *DstInpTy << ") elements("
+ << DstElements << ") VecLen(" << DstInpSize << ") type("
+ << *DstElemTy << ") Access alignment("
+ << *Dst->getOperand(2) << ")\n");
// Address of source
auto *Src = getPointer(IndexLoad);
if (!Src)
@@ -2700,11 +2695,11 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
// Do full-precision multiply and shift.
Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
if (Rounding) {
- Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
+ Value *RoundVal = ConstantInt::get(Prod32->getType(), 1 << *Op.RoundAt);
Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
}
- Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
+ Value *ShiftAmt = ConstantInt::get(Prod32->getType(), Op.Frac);
Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed
? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
: Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
@@ -2723,10 +2718,10 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
// Add the optional rounding to the proper word.
if (Op.RoundAt.has_value()) {
- Value *Zero = HVC.getNullValue(WordX[0]->getType());
+ Value *Zero = Constant::getNullValue(WordX[0]->getType());
SmallVector<Value *> RoundV(WordP.size(), Zero);
RoundV[*Op.RoundAt / 32] =
- HVC.getConstSplat(HvxWordTy, 1 << (*Op.RoundAt % 32));
+ ConstantInt::get(HvxWordTy, 1 << (*Op.RoundAt % 32));
WordP = createAddLong(Builder, WordP, RoundV);
}
@@ -2734,7 +2729,7 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
// Shift all products right by Op.Frac.
unsigned SkipWords = Op.Frac / 32;
- Constant *ShiftAmt = HVC.getConstSplat(HvxWordTy, Op.Frac % 32);
+ Constant *ShiftAmt = ConstantInt::get(HvxWordTy, Op.Frac % 32);
for (int Dst = 0, End = WordP.size() - SkipWords; Dst != End; ++Dst) {
int Src = Dst + SkipWords;
@@ -2803,7 +2798,7 @@ auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
} else {
AddCarry = HVC.HST.getIntrinsicId(Hexagon::V6_vaddcarry);
if (CarryIn == nullptr)
- CarryIn = HVC.getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
+ CarryIn = Constant::getNullValue(HVC.getBoolTy(HVC.length(VecTy)));
Args.push_back(CarryIn);
}
Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
@@ -2951,7 +2946,7 @@ auto HvxIdioms::createMulLong(IRBuilderBase &Builder, ArrayRef<Value *> WordX,
}
}
- Value *Zero = HVC.getNullValue(WordX[0]->getType());
+ Value *Zero = Constant::getNullValue(WordX[0]->getType());
auto pop_back_or_zero = [Zero](auto &Vector) -> Value * {
if (Vector.empty())
@@ -3147,33 +3142,6 @@ auto HexagonVectorCombine::length(Type *Ty) const -> size_t {
return VecTy->getElementCount().getFixedValue();
}
-auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
- assert(Ty->isIntOrIntVectorTy());
- auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
- if (auto *VecTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
- return Zero;
-}
-
-auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
- assert(Ty->isIntOrIntVectorTy());
- auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
- if (auto *VecTy = dyn_cast<VectorType>(Ty))
- return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
- return Minus1;
-}
-
-auto HexagonVectorCombine::getConstSplat(Type *Ty, int Val) const
- -> Constant * {
- assert(Ty->isVectorTy());
- auto VecTy = cast<VectorType>(Ty);
- Type *ElemTy = VecTy->getElementType();
- // Add support for floats if needed.
- auto *Splat = ConstantVector::getSplat(VecTy->getElementCount(),
- ConstantInt::get(ElemTy, Val));
- return Splat;
-}
-
auto HexagonVectorCombine::simplify(Value *V) const -> Value * {
if (auto *In = dyn_cast<Instruction>(V)) {
SimplifyQuery Q(DL, &TLI, &DT, &AC, In);
@@ -3581,7 +3549,7 @@ auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
// If there are too few, fill them with the sign bit.
Value *Last = Inputs.back();
Value *Sign = Builder.CreateAShr(
- Last, getConstSplat(Last->getType(), Width - 1), "asr");
+ Last, ConstantInt::get(Last->getType(), Width - 1), "asr");
Inputs.resize(NeedInputs, Sign);
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 1a0f1ab..5a187d2 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -569,9 +569,9 @@ public:
return true;
}
- bool finishLayout(const MCAssembler &Asm) const override {
+ bool finishLayout() const override {
SmallVector<MCFragment *> Frags;
- for (MCSection &Sec : Asm) {
+ for (MCSection &Sec : *Asm) {
Frags.clear();
for (MCFragment &F : Sec)
Frags.push_back(&F);
@@ -580,7 +580,7 @@ public:
default:
break;
case MCFragment::FT_Align: {
- auto Size = Asm.computeFragmentSize(*Frags[J]);
+ auto Size = Asm->computeFragmentSize(*Frags[J]);
for (auto K = J; K != 0 && Size >= HEXAGON_PACKET_SIZE;) {
--K;
switch (Frags[K]->getKind()) {
@@ -597,14 +597,14 @@ public:
MCInst Inst = RF.getInst();
const bool WouldTraverseLabel = llvm::any_of(
- Asm.symbols(), [&Asm, &RF, &Inst](MCSymbol const &sym) {
+ Asm->symbols(), [&RF, &Inst, Asm = Asm](MCSymbol const &sym) {
uint64_t Offset = 0;
- const bool HasOffset = Asm.getSymbolOffset(sym, Offset);
+ const bool HasOffset = Asm->getSymbolOffset(sym, Offset);
const unsigned PacketSizeBytes =
HexagonMCInstrInfo::bundleSize(Inst) *
HEXAGON_INSTR_SIZE;
const bool OffsetPastSym =
- Offset <= (Asm.getFragmentOffset(RF) + PacketSizeBytes);
+ Offset <= Asm->getFragmentOffset(RF) + PacketSizeBytes;
return !sym.isVariable() && Offset != 0 && HasOffset &&
OffsetPastSym;
});
@@ -631,7 +631,7 @@ public:
*RF.getSubtargetInfo(), Inst);
//assert(!Error);
(void)Error;
- ReplaceInstruction(Asm.getEmitter(), RF, Inst);
+ ReplaceInstruction(Asm->getEmitter(), RF, Inst);
Size = 0; // Only look back one instruction
break;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 9b6bc5a..0b2279b 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -385,7 +385,7 @@ bool HexagonMCChecker::checkSlots() {
bool HexagonMCChecker::checkPredicates() {
// Check for proper use of new predicate registers.
for (const auto &I : NewPreds) {
- unsigned P = I;
+ MCRegister P = I;
if (!Defs.count(P) || LatePreds.count(P) || Defs.count(Hexagon::P3_0)) {
// Error out if the new predicate register is not defined,
@@ -398,7 +398,7 @@ bool HexagonMCChecker::checkPredicates() {
// Check for proper use of auto-anded of predicate registers.
for (const auto &I : LatePreds) {
- unsigned P = I;
+ MCRegister P = I;
if (LatePreds.count(P) > 1 || Defs.count(P)) {
// Error out if predicate register defined "late" multiple times or
@@ -607,7 +607,7 @@ void HexagonMCChecker::checkRegisterCurDefs() {
bool HexagonMCChecker::checkRegisters() {
// Check for proper register definitions.
for (const auto &I : Defs) {
- unsigned R = I.first;
+ MCRegister R = I.first;
if (isLoopRegister(R) && Defs.count(R) > 1 &&
(HexagonMCInstrInfo::isInnerLoop(MCB) ||
@@ -620,8 +620,8 @@ bool HexagonMCChecker::checkRegisters() {
if (SoftDefs.count(R)) {
// Error out for explicit changes to registers also weakly defined
// (e.g., "{ usr = r0; r0 = sfadd(...) }").
- unsigned UsrR = Hexagon::USR; // Silence warning about mixed types in ?:.
- unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ MCRegister UsrR = Hexagon::USR;
+ MCRegister BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
reportErrorRegisters(BadR);
return false;
}
@@ -633,8 +633,8 @@ bool HexagonMCChecker::checkRegisters() {
if (PM.count(Unconditional)) {
// Error out on an unconditional change when there are any other
// changes, conditional or not.
- unsigned UsrR = Hexagon::USR;
- unsigned BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
+ MCRegister UsrR = Hexagon::USR;
+ MCRegister BadR = RI.isSubRegister(Hexagon::USR, R) ? UsrR : R;
reportErrorRegisters(BadR);
return false;
}
@@ -664,7 +664,7 @@ bool HexagonMCChecker::checkRegisters() {
// Check for use of temporary definitions.
for (const auto &I : TmpDefs) {
- unsigned R = I;
+ MCRegister R = I;
if (!Uses.count(R)) {
// special case for vhist
@@ -765,12 +765,12 @@ void HexagonMCChecker::compoundRegisterMap(unsigned &Register) {
}
}
-void HexagonMCChecker::reportErrorRegisters(unsigned Register) {
+void HexagonMCChecker::reportErrorRegisters(MCRegister Register) {
reportError("register `" + Twine(RI.getName(Register)) +
"' modified more than once");
}
-void HexagonMCChecker::reportErrorNewValue(unsigned Register) {
+void HexagonMCChecker::reportErrorNewValue(MCRegister Register) {
reportError("register `" + Twine(RI.getName(Register)) +
"' used with `.new' "
"but not validly modified in the same packet");
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
index e9b87c5..8beee8d 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.h
@@ -39,41 +39,41 @@ class HexagonMCChecker {
bool ReportErrors;
/// Set of definitions: register #, if predicated, if predicated true.
- using PredSense = std::pair<unsigned, bool>;
+ using PredSense = std::pair<MCRegister, bool>;
static const PredSense Unconditional;
using PredSet = std::multiset<PredSense>;
using PredSetIterator = std::multiset<PredSense>::iterator;
- using DefsIterator = DenseMap<unsigned, PredSet>::iterator;
- DenseMap<unsigned, PredSet> Defs;
+ using DefsIterator = DenseMap<MCRegister, PredSet>::iterator;
+ DenseMap<MCRegister, PredSet> Defs;
/// Set of weak definitions whose clashes should be enforced selectively.
- using SoftDefsIterator = std::set<unsigned>::iterator;
- std::set<unsigned> SoftDefs;
+ using SoftDefsIterator = std::set<MCRegister>::iterator;
+ std::set<MCRegister> SoftDefs;
/// Set of temporary definitions not committed to the register file.
- using TmpDefsIterator = std::set<unsigned>::iterator;
- std::set<unsigned> TmpDefs;
+ using TmpDefsIterator = std::set<MCRegister>::iterator;
+ std::set<MCRegister> TmpDefs;
/// Set of new predicates used.
- using NewPredsIterator = std::set<unsigned>::iterator;
- std::set<unsigned> NewPreds;
+ using NewPredsIterator = std::set<MCRegister>::iterator;
+ std::set<MCRegister> NewPreds;
/// Set of predicates defined late.
- using LatePredsIterator = std::multiset<unsigned>::iterator;
- std::multiset<unsigned> LatePreds;
+ using LatePredsIterator = std::multiset<MCRegister>::iterator;
+ std::multiset<MCRegister> LatePreds;
/// Set of uses.
- using UsesIterator = std::set<unsigned>::iterator;
- std::set<unsigned> Uses;
+ using UsesIterator = std::set<MCRegister>::iterator;
+ std::set<MCRegister> Uses;
/// Pre-defined set of read-only registers.
- using ReadOnlyIterator = std::set<unsigned>::iterator;
- std::set<unsigned> ReadOnly;
+ using ReadOnlyIterator = std::set<MCRegister>::iterator;
+ std::set<MCRegister> ReadOnly;
// Contains the vector-pair-registers with the even number
// first ("v0:1", e.g.) used/def'd in this packet.
- std::set<unsigned> ReversePairs;
+ std::set<MCRegister> ReversePairs;
void init();
void init(MCInst const &);
@@ -107,7 +107,7 @@ class HexagonMCChecker {
static void compoundRegisterMap(unsigned &);
- bool isLoopRegister(unsigned R) const {
+ bool isLoopRegister(MCRegister R) const {
return (Hexagon::SA0 == R || Hexagon::LC0 == R || Hexagon::SA1 == R ||
Hexagon::LC1 == R);
}
@@ -120,8 +120,8 @@ public:
MCSubtargetInfo const &STI, bool CopyReportErrors);
bool check(bool FullCheck = true);
- void reportErrorRegisters(unsigned Register);
- void reportErrorNewValue(unsigned Register);
+ void reportErrorRegisters(MCRegister Register);
+ void reportErrorNewValue(MCRegister Register);
void reportError(SMLoc Loc, Twine const &Msg);
void reportNote(SMLoc Loc, Twine const &Msg);
void reportError(Twine const &Msg);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index c5e57d0..712bdbe 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -21,7 +21,6 @@
#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstddef>
#include <cstdint>
-#include <memory>
namespace llvm {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 2f59b7c..10c350e 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -67,6 +67,11 @@ void HexagonMCELFStreamer::emitInstruction(const MCInst &MCB,
assert(MCB.getOpcode() == Hexagon::BUNDLE);
assert(HexagonMCInstrInfo::bundleSize(MCB) <= HEXAGON_PACKET_SIZE);
assert(HexagonMCInstrInfo::bundleSize(MCB) > 0);
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ HexagonMCChecker Check(getContext(), *MCII, STI, const_cast<MCInst &>(MCB),
+ *RI);
+ [[maybe_unused]] bool CheckOk = Check.check(false);
+ assert(CheckOk);
// At this point, MCB is a bundle
// Iterate through the bundle and assign addends for the instructions
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index 3b1d3bd..4d0df66 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -26,7 +26,6 @@
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
-#include <utility>
using namespace llvm;
using namespace rdf;
@@ -44,11 +43,11 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
const MachineOperand &Src = MI->getOperand(1);
RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg());
- assert(Register::isPhysicalRegister(DstR.Reg));
- assert(Register::isPhysicalRegister(SrcR.Reg));
+ assert(DstR.asMCReg().isPhysical());
+ assert(SrcR.asMCReg().isPhysical());
const TargetRegisterInfo &TRI = DFG.getTRI();
- if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
- TRI.getMinimalPhysRegClass(SrcR.Reg))
+ if (TRI.getMinimalPhysRegClass(DstR.asMCReg()) !=
+ TRI.getMinimalPhysRegClass(SrcR.asMCReg()))
return false;
if (!DFG.isTracked(SrcR) || !DFG.isTracked(DstR))
return false;
@@ -66,7 +65,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
Copies.push_back(SA.Id);
for (auto I : EM) {
- auto FS = DefM.find(I.second.Reg);
+ auto FS = DefM.find(I.second.Id);
if (FS == DefM.end() || FS->second.empty())
continue; // Undefined source
RDefMap[I.second][SA.Id] = FS->second.top()->Id;
@@ -93,7 +92,7 @@ void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
for (auto &R : RDefMap) {
if (!RRs.count(R.first))
continue;
- auto F = DefM.find(R.first.Reg);
+ auto F = DefM.find(R.first.Id);
if (F == DefM.end() || F->second.empty())
continue;
R.second[IA.Id] = F->second.top()->Id;
@@ -155,16 +154,16 @@ bool CopyPropagation::run() {
bool HasLimit = CpLimit.getNumOccurrences() > 0;
#endif
- auto MinPhysReg = [this] (RegisterRef RR) -> unsigned {
+ auto MinPhysReg = [this](RegisterRef RR) -> MCRegister {
const TargetRegisterInfo &TRI = DFG.getTRI();
- const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
+ const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.asMCReg());
if ((RC.LaneMask & RR.Mask) == RC.LaneMask)
- return RR.Reg;
- for (MCSubRegIndexIterator S(RR.Reg, &TRI); S.isValid(); ++S)
+ return RR.asMCReg();
+ for (MCSubRegIndexIterator S(RR.asMCReg(), &TRI); S.isValid(); ++S)
if (RR.Mask == TRI.getSubRegIndexLaneMask(S.getSubRegIndex()))
return S.getSubReg();
llvm_unreachable("Should have found a register");
- return 0;
+ return MCRegister();
};
const PhysicalRegisterInfo &PRI = DFG.getPRI();
@@ -215,7 +214,7 @@ bool CopyPropagation::run() {
<< *NodeAddr<StmtNode*>(IA).Addr->getCode();
}
- unsigned NewReg = MinPhysReg(SR);
+ MCRegister NewReg = MinPhysReg(SR);
Op.setReg(NewReg);
Op.setSubReg(0);
DFG.unlinkUse(UA, false);